From 7adf4203c2b75447def8483a93d972ed42eb69fc Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+wandb-zacharyblasczyk@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:16:41 -0500 Subject: [PATCH 01/72] fix: Adding missing extra_fqdn support for operator that was supported previously (#197) * fix: Adding extra_fqdn support for operator * extreamly cursed code to get around an external dns bug, bumping LB Conrroller and External DNS * docs update --- README.md | 20 +++++++++++++++++++- main.tf | 17 ++++++++++++++--- modules/app_eks/external_dns/external_dns.tf | 4 +--- modules/app_eks/lb_controller/controller.tf | 2 +- variables.tf | 5 +++-- 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 56844eba..2f62840f 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,8 @@ Upgrades must be executed in step-wise fashion from one version to the next. You | [acm\_certificate\_arn](#input\_acm\_certificate\_arn) | The ARN of an existing ACM certificate. | `string` | `null` | no | | [allowed\_inbound\_cidr](#input\_allowed\_inbound\_cidr) | CIDRs allowed to access wandb-server. | `list(string)` | n/a | yes | | [allowed\_inbound\_ipv6\_cidr](#input\_allowed\_inbound\_ipv6\_cidr) | CIDRs allowed to access wandb-server. | `list(string)` | n/a | yes | +| [app\_wandb\_env](#input\_app\_wandb\_env) | Extra environment variables for W&B | `map(string)` | `{}` | no | +| [aws\_loadbalancer\_controller\_tags](#input\_aws\_loadbalancer\_controller\_tags) | (Optional) A map of AWS tags to apply to all resources managed by the load balancer controller | `map(string)` | `{}` | no | | [bucket\_kms\_key\_arn](#input\_bucket\_kms\_key\_arn) | The Amazon Resource Name of the KMS key with which S3 storage bucket objects will be encrypted. | `string` | `""` | no | | [bucket\_name](#input\_bucket\_name) | n/a | `string` | `""` | no | | [create\_bucket](#input\_create\_bucket) | ######################################### External Bucket # ######################################### Most users will not need these settings. They are ment for users who want a bucket and sqs that are in a different account. | `bool` | `true` | no | @@ -177,14 +179,17 @@ Upgrades must be executed in step-wise fashion from one version to the next. You | [enable\_dummy\_dns](#input\_enable\_dummy\_dns) | Boolean indicating whether or not to enable dummy DNS for the old alb | `bool` | `false` | no | | [enable\_operator\_alb](#input\_enable\_operator\_alb) | Boolean indicating whether to use operatore ALB (true) or not (false). | `bool` | `false` | no | | [external\_dns](#input\_external\_dns) | Using external DNS. A `subdomain` must also be specified if this value is true. | `bool` | `false` | no | -| [extra\_fqdn](#input\_extra\_fqdn) | n/a | `list(string)` | `[]` | no | +| [extra\_fqdn](#input\_extra\_fqdn) | Additional fqdn's must be in the same hosted zone as `domain_name`. | `list(string)` | `[]` | no | | [kms\_key\_alias](#input\_kms\_key\_alias) | KMS key alias for AWS KMS Customer managed key. | `string` | `null` | no | | [kms\_key\_deletion\_window](#input\_kms\_key\_deletion\_window) | Duration in days to destroy the key after it is deleted. Must be between 7 and 30 days. | `number` | `7` | no | | [kms\_key\_policy](#input\_kms\_key\_policy) | The policy that will define the permissions for the kms key. | `string` | `""` | no | +| [kubernetes\_alb\_internet\_facing](#input\_kubernetes\_alb\_internet\_facing) | Indicates whether or not the ALB controlled by the Amazon ALB ingress controller is internet-facing or internal. | `bool` | `true` | no | +| [kubernetes\_alb\_subnets](#input\_kubernetes\_alb\_subnets) | List of subnet ID's the ALB will use for ingress traffic. | `list(string)` | `[]` | no | | [kubernetes\_instance\_types](#input\_kubernetes\_instance\_types) | EC2 Instance type for primary node group. | `list(string)` |
[
"m5.large"
]
| no | | [kubernetes\_map\_accounts](#input\_kubernetes\_map\_accounts) | Additional AWS account numbers to add to the aws-auth configmap. | `list(string)` | `[]` | no | | [kubernetes\_map\_roles](#input\_kubernetes\_map\_roles) | Additional IAM roles to add to the aws-auth configmap. |
list(object({
rolearn = string
username = string
groups = list(string)
}))
| `[]` | no | | [kubernetes\_map\_users](#input\_kubernetes\_map\_users) | Additional IAM users to add to the aws-auth configmap. |
list(object({
userarn = string
username = string
groups = list(string)
}))
| `[]` | no | +| [kubernetes\_node\_count](#input\_kubernetes\_node\_count) | Number of nodes | `number` | `2` | no | | [kubernetes\_public\_access](#input\_kubernetes\_public\_access) | Indicates whether or not the Amazon EKS public API server endpoint is enabled. | `bool` | `false` | no | | [kubernetes\_public\_access\_cidrs](#input\_kubernetes\_public\_access\_cidrs) | List of CIDR blocks which can access the Amazon EKS public API server endpoint. | `list(string)` | `[]` | no | | [license](#input\_license) | Weights & Biases license key. | `string` | n/a | yes | @@ -193,17 +198,25 @@ Upgrades must be executed in step-wise fashion from one version to the next. You | [network\_database\_subnet\_cidrs](#input\_network\_database\_subnet\_cidrs) | List of private subnet CIDR ranges to create in VPC. | `list(string)` |
[
"10.10.20.0/24",
"10.10.21.0/24"
]
| no | | [network\_database\_subnets](#input\_network\_database\_subnets) | A list of the identities of the database subnetworks in which resources will be deployed. | `list(string)` | `[]` | no | | [network\_elasticache\_subnet\_cidrs](#input\_network\_elasticache\_subnet\_cidrs) | List of private subnet CIDR ranges to create in VPC. | `list(string)` |
[
"10.10.30.0/24",
"10.10.31.0/24"
]
| no | +| [network\_elasticache\_subnets](#input\_network\_elasticache\_subnets) | A list of the identities of the subnetworks in which elasticache resources will be deployed. | `list(string)` | `[]` | no | | [network\_id](#input\_network\_id) | The identity of the VPC in which resources will be deployed. | `string` | `""` | no | | [network\_private\_subnet\_cidrs](#input\_network\_private\_subnet\_cidrs) | List of private subnet CIDR ranges to create in VPC. | `list(string)` |
[
"10.10.10.0/24",
"10.10.11.0/24"
]
| no | | [network\_private\_subnets](#input\_network\_private\_subnets) | A list of the identities of the private subnetworks in which resources will be deployed. | `list(string)` | `[]` | no | | [network\_public\_subnet\_cidrs](#input\_network\_public\_subnet\_cidrs) | List of private subnet CIDR ranges to create in VPC. | `list(string)` |
[
"10.10.0.0/24",
"10.10.1.0/24"
]
| no | | [network\_public\_subnets](#input\_network\_public\_subnets) | A list of the identities of the public subnetworks in which resources will be deployed. | `list(string)` | `[]` | no | | [other\_wandb\_env](#input\_other\_wandb\_env) | Extra environment variables for W&B | `map(any)` | `{}` | no | +| [parquet\_wandb\_env](#input\_parquet\_wandb\_env) | Extra environment variables for W&B | `map(string)` | `{}` | no | | [private\_link\_allowed\_account\_ids](#input\_private\_link\_allowed\_account\_ids) | List of AWS account IDs allowed to access the VPC Endpoint Service | `list(string)` | `[]` | no | | [public\_access](#input\_public\_access) | Is this instance accessable a public domain. | `bool` | `false` | no | +| [size](#input\_size) | Deployment size | `string` | `null` | no | | [ssl\_policy](#input\_ssl\_policy) | SSL policy to use on ALB listener | `string` | `"ELBSecurityPolicy-FS-1-2-Res-2020-10"` | no | | [subdomain](#input\_subdomain) | Subdomain for accessing the Weights & Biases UI. Default creates record at Route53 Route. | `string` | `null` | no | +| [system\_reserved\_cpu\_millicores](#input\_system\_reserved\_cpu\_millicores) | (Optional) The amount of 'system-reserved' CPU millicores to pass to the kubelet. For example: 100. A value of -1 disables the flag. | `number` | `70` | no | +| [system\_reserved\_ephemeral\_megabytes](#input\_system\_reserved\_ephemeral\_megabytes) | (Optional) The amount of 'system-reserved' ephemeral storage in megabytes to pass to the kubelet. For example: 1000. A value of -1 disables the flag. | `number` | `750` | no | +| [system\_reserved\_memory\_megabytes](#input\_system\_reserved\_memory\_megabytes) | (Optional) The amount of 'system-reserved' memory in megabytes to pass to the kubelet. For example: 100. A value of -1 disables the flag. | `number` | `100` | no | +| [system\_reserved\_pid](#input\_system\_reserved\_pid) | (Optional) The amount of 'system-reserved' process ids [pid] to pass to the kubelet. For example: 1000. A value of -1 disables the flag. | `number` | `500` | no | | [use\_internal\_queue](#input\_use\_internal\_queue) | n/a | `bool` | `false` | no | +| [weave\_wandb\_env](#input\_weave\_wandb\_env) | Extra environment variables for W&B | `map(string)` | `{}` | no | | [zone\_id](#input\_zone\_id) | Domain for creating the Weights & Biases subdomain on. | `string` | n/a | yes | ## Outputs @@ -216,14 +229,19 @@ Upgrades must be executed in step-wise fashion from one version to the next. You | [cluster\_id](#output\_cluster\_id) | n/a | | [cluster\_node\_role](#output\_cluster\_node\_role) | n/a | | [database\_connection\_string](#output\_database\_connection\_string) | n/a | +| [database\_instance\_type](#output\_database\_instance\_type) | n/a | | [database\_password](#output\_database\_password) | n/a | | [database\_username](#output\_database\_username) | n/a | +| [eks\_node\_count](#output\_eks\_node\_count) | n/a | +| [eks\_node\_instance\_type](#output\_eks\_node\_instance\_type) | n/a | | [elasticache\_connection\_string](#output\_elasticache\_connection\_string) | n/a | | [internal\_app\_port](#output\_internal\_app\_port) | n/a | | [kms\_key\_arn](#output\_kms\_key\_arn) | The Amazon Resource Name of the KMS key used to encrypt data at rest. | | [network\_id](#output\_network\_id) | The identity of the VPC in which resources are deployed. | | [network\_private\_subnets](#output\_network\_private\_subnets) | The identities of the private subnetworks deployed within the VPC. | | [network\_public\_subnets](#output\_network\_public\_subnets) | The identities of the public subnetworks deployed within the VPC. | +| [redis\_instance\_type](#output\_redis\_instance\_type) | n/a | +| [standardized\_size](#output\_standardized\_size) | n/a | | [url](#output\_url) | The URL to the W&B application | diff --git a/main.tf b/main.tf index 0942b8e4..6a90c9e0 100644 --- a/main.tf +++ b/main.tf @@ -152,6 +152,11 @@ module "app_eks" { aws_loadbalancer_controller_tags = var.aws_loadbalancer_controller_tags } +locals { + full_fqdn = var.enable_dummy_dns ? "old.${local.fqdn}" : local.fqdn + extra_fqdn = var.enable_dummy_dns ? [for fqdn in var.extra_fqdn : "old.${fqdn}"] : var.extra_fqdn +} + module "app_lb" { source = "./modules/app_lb" @@ -160,8 +165,8 @@ module "app_lb" { acm_certificate_arn = local.acm_certificate_arn zone_id = var.zone_id - fqdn = var.enable_dummy_dns ? "old.${local.fqdn}" : local.fqdn - extra_fqdn = var.extra_fqdn + fqdn = local.full_fqdn + extra_fqdn = local.extra_fqdn allowed_inbound_cidr = var.allowed_inbound_cidr allowed_inbound_ipv6_cidr = var.allowed_inbound_ipv6_cidr target_port = local.internal_app_port @@ -270,13 +275,19 @@ module "wandb" { "alb.ingress.kubernetes.io/inbound-cidrs" = <<-EOF ${join("\\,", var.allowed_inbound_cidr)} EOF - "external-dns.alpha.kubernetes.io/hostname" = var.enable_operator_alb ? local.fqdn : "" "external-dns.alpha.kubernetes.io/ingress-hostname-source" = "annotation-only" "alb.ingress.kubernetes.io/scheme" = var.kubernetes_alb_internet_facing ? "internet-facing" : "internal" "alb.ingress.kubernetes.io/target-type" = "ip" "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" "alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn }, + length(var.extra_fqdn) > 0 && var.enable_dummy_dns ? { + "external-dns.alpha.kubernetes.io/hostname" = <<-EOF + ${local.fqdn}\,${join("\\,", var.extra_fqdn)}\,${local.fqdn} + EOF + } : { + "external-dns.alpha.kubernetes.io/hostname" = var.enable_operator_alb ? local.fqdn : "" + }, length(var.kubernetes_alb_subnets) > 0 ? { "alb.ingress.kubernetes.io/subnets" = <<-EOF ${join("\\,", var.kubernetes_alb_subnets)} diff --git a/modules/app_eks/external_dns/external_dns.tf b/modules/app_eks/external_dns/external_dns.tf index 00651b1e..875fc875 100644 --- a/modules/app_eks/external_dns/external_dns.tf +++ b/modules/app_eks/external_dns/external_dns.tf @@ -2,7 +2,7 @@ resource "helm_release" "external_dns" { name = "external-dns" namespace = "kube-system" chart = "external-dns" - version = "1.13.1" + version = "1.14.1" repository = "https://kubernetes-sigs.github.io/external-dns" set { @@ -34,6 +34,4 @@ resource "helm_release" "external_dns" { name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" value = aws_iam_role.default.arn } - - } diff --git a/modules/app_eks/lb_controller/controller.tf b/modules/app_eks/lb_controller/controller.tf index 6b9839f4..a28e9406 100644 --- a/modules/app_eks/lb_controller/controller.tf +++ b/modules/app_eks/lb_controller/controller.tf @@ -10,7 +10,7 @@ resource "helm_release" "aws_load_balancer_controller" { repository = "https://aws.github.io/eks-charts" chart = "aws-load-balancer-controller" namespace = "kube-system" - version = "1.6.2" + version = "1.7.2" set { name = "clusterName" diff --git a/variables.tf b/variables.tf index 4765e606..df47571e 100644 --- a/variables.tf +++ b/variables.tf @@ -138,8 +138,9 @@ variable "enable_operator_alb" { } variable "extra_fqdn" { - type = list(string) - default = [] + type = list(string) + description = "Additional fqdn's must be in the same hosted zone as `domain_name`." + default = [] } ########################################## From 26d22eb07767e7fedf5b5708db962af4fc7ec6cb Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 17 Apr 2024 15:17:16 +0000 Subject: [PATCH 02/72] chore(release): version 4.7.1 [skip ci] ### [4.7.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.7.0...v4.7.1) (2024-04-17) ### Bug Fixes * Adding missing extra_fqdn support for operator that was supported previously ([#197](https://github.com/wandb/terraform-aws-wandb/issues/197)) ([7adf420](https://github.com/wandb/terraform-aws-wandb/commit/7adf4203c2b75447def8483a93d972ed42eb69fc)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 21beb42b..f483278e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.7.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.7.0...v4.7.1) (2024-04-17) + + +### Bug Fixes + +* Adding missing extra_fqdn support for operator that was supported previously ([#197](https://github.com/wandb/terraform-aws-wandb/issues/197)) ([7adf420](https://github.com/wandb/terraform-aws-wandb/commit/7adf4203c2b75447def8483a93d972ed42eb69fc)) + ## [4.7.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.6.0...v4.7.0) (2024-04-04) From 3ea7ce11b594dafe8b4d59523ca2ad9876b132ce Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+wandb-zacharyblasczyk@users.noreply.github.com> Date: Fri, 19 Apr 2024 13:10:05 -0500 Subject: [PATCH 03/72] fix: Retention Adjustment (#204) * fix: Retention Adjustment --- modules/database/variables.tf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/database/variables.tf b/modules/database/variables.tf index d6b74261..f170ad99 100644 --- a/modules/database/variables.tf +++ b/modules/database/variables.tf @@ -57,7 +57,7 @@ variable "deletion_protection" { variable "backup_retention_period" { description = "The days to retain backups for." type = number - default = 30 + default = 14 } variable "preferred_backup_window" { @@ -114,7 +114,6 @@ variable "innodb_lru_scan_depth" { default = 128 } - # Cluster parametes variable "binlog_row_image" { description = "Value for binlog_row_image" From ae7cb27ff9251c3a94dbe5002aee8a462c8ff2b8 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 19 Apr 2024 18:10:34 +0000 Subject: [PATCH 04/72] chore(release): version 4.7.2 [skip ci] ### [4.7.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.7.1...v4.7.2) (2024-04-19) ### Bug Fixes * Retention Adjustment ([#204](https://github.com/wandb/terraform-aws-wandb/issues/204)) ([3ea7ce1](https://github.com/wandb/terraform-aws-wandb/commit/3ea7ce11b594dafe8b4d59523ca2ad9876b132ce)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f483278e..e57baa64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.7.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.7.1...v4.7.2) (2024-04-19) + + +### Bug Fixes + +* Retention Adjustment ([#204](https://github.com/wandb/terraform-aws-wandb/issues/204)) ([3ea7ce1](https://github.com/wandb/terraform-aws-wandb/commit/3ea7ce11b594dafe8b4d59523ca2ad9876b132ce)) + ### [4.7.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.7.0...v4.7.1) (2024-04-17) From 0ef5828c8278c7fb585598e48197daf6dcbf0317 Mon Sep 17 00:00:00 2001 From: Aastha Gupta <71313011+velotioaastha@users.noreply.github.com> Date: Tue, 23 Apr 2024 21:05:39 +0530 Subject: [PATCH 05/72] feat: Dropping support for MySQL 5.7 (#183) * added changes to remove upgradation changes * Added changes for local naming convention fixes * added changes to remove upgradation changes * Added changes for local naming convention fixes --------- Co-authored-by: Aastha Gupta --- modules/database/main.tf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/database/main.tf b/modules/database/main.tf index 789318ec..c5d7b8bb 100644 --- a/modules/database/main.tf +++ b/modules/database/main.tf @@ -9,9 +9,8 @@ resource "random_string" "master_password" { } locals { - is_mysql_80 = length(regexall("^8.0", var.engine_version)) > 0 - engine_version_tag = local.is_mysql_80 ? "80" : "57" - parameter_family = local.is_mysql_80 ? "aurora-mysql8.0" : "aurora-mysql5.7" + engine_version_tag = "80" + parameter_family = "aurora-mysql8.0" parameter_group_name = "${var.namespace}-aurora-db-${local.engine_version_tag}-parameter-group" parameter_cluster_name = "${var.namespace}-aurora-${local.engine_version_tag}-cluster-parameter-group" } From bc232d2fff26b94321d6b0cae81c4b8ed0ad481e Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Tue, 23 Apr 2024 15:36:10 +0000 Subject: [PATCH 06/72] chore(release): version 4.8.0 [skip ci] ## [4.8.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.7.2...v4.8.0) (2024-04-23) ### Features * Dropping support for MySQL 5.7 ([#183](https://github.com/wandb/terraform-aws-wandb/issues/183)) ([0ef5828](https://github.com/wandb/terraform-aws-wandb/commit/0ef5828c8278c7fb585598e48197daf6dcbf0317)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e57baa64..25849d41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.8.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.7.2...v4.8.0) (2024-04-23) + + +### Features + +* Dropping support for MySQL 5.7 ([#183](https://github.com/wandb/terraform-aws-wandb/issues/183)) ([0ef5828](https://github.com/wandb/terraform-aws-wandb/commit/0ef5828c8278c7fb585598e48197daf6dcbf0317)) + ### [4.7.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.7.1...v4.7.2) (2024-04-19) From 0a44b43582083832b459822fc4f2af0492f3b4e6 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+wandb-zacharyblasczyk@users.noreply.github.com> Date: Tue, 23 Apr 2024 14:22:04 -0500 Subject: [PATCH 07/72] fix: Update external_dns interval (#203) Default is "1m", but we are hitting API rate limiting. --- modules/app_eks/external_dns/external_dns.tf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/app_eks/external_dns/external_dns.tf b/modules/app_eks/external_dns/external_dns.tf index 875fc875..f23d67d8 100644 --- a/modules/app_eks/external_dns/external_dns.tf +++ b/modules/app_eks/external_dns/external_dns.tf @@ -30,6 +30,11 @@ resource "helm_release" "external_dns" { value = "sync" } + set { + name = "interval" + value = "3m" + } + set { name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" value = aws_iam_role.default.arn From a2333f4513a2fc50292c913007ca532e40e4e02c Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Tue, 23 Apr 2024 19:22:39 +0000 Subject: [PATCH 08/72] chore(release): version 4.8.1 [skip ci] ### [4.8.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.8.0...v4.8.1) (2024-04-23) ### Bug Fixes * Update external_dns interval ([#203](https://github.com/wandb/terraform-aws-wandb/issues/203)) ([0a44b43](https://github.com/wandb/terraform-aws-wandb/commit/0a44b43582083832b459822fc4f2af0492f3b4e6)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25849d41..6a058e24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.8.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.8.0...v4.8.1) (2024-04-23) + + +### Bug Fixes + +* Update external_dns interval ([#203](https://github.com/wandb/terraform-aws-wandb/issues/203)) ([0a44b43](https://github.com/wandb/terraform-aws-wandb/commit/0a44b43582083832b459822fc4f2af0492f3b4e6)) + ## [4.8.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.7.2...v4.8.0) (2024-04-23) From dd4e1fe49a949af461349ee1e5d4bc9306626f90 Mon Sep 17 00:00:00 2001 From: George Scott Date: Tue, 30 Apr 2024 12:36:59 -0500 Subject: [PATCH 09/72] feat: Update default RDS version to 8.0.mysql_aurora.3.05.2 (#209) * mysql version * alphabetize --- CHANGELOG.md | 2 +- README.md | 2 +- main.tf | 10 ++++------ modules/database/variables.tf | 2 +- variables.tf | 2 +- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a058e24..08551841 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -493,7 +493,7 @@ All notable changes to this project will be documented in this file. ### Bug Fixes -* Set MySQL default version to 8.0.mysql_aurora.3.03.0 ([#63](https://github.com/wandb/terraform-aws-wandb/issues/63)) ([7340b1f](https://github.com/wandb/terraform-aws-wandb/commit/7340b1f8761c4a0edaefbd22e4c4fd61bb8f16af)) +* Set MySQL default version to 8.0.mysql_aurora.3.05.2 ([#63](https://github.com/wandb/terraform-aws-wandb/issues/63)) ([7340b1f](https://github.com/wandb/terraform-aws-wandb/commit/7340b1f8761c4a0edaefbd22e4c4fd61bb8f16af)) ## [1.11.0](https://github.com/wandb/terraform-aws-wandb/compare/v1.10.5...v1.11.0) (2023-02-28) diff --git a/README.md b/README.md index 2f62840f..6efdd719 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,7 @@ Upgrades must be executed in step-wise fashion from one version to the next. You | [create\_vpc](#input\_create\_vpc) | Boolean indicating whether to deploy a VPC (true) or not (false). | `bool` | `true` | no | | [custom\_domain\_filter](#input\_custom\_domain\_filter) | A custom domain filter to be used by external-dns instead of the default FQDN. If not set, the local FQDN is used. | `string` | `null` | no | | [database\_binlog\_format](#input\_database\_binlog\_format) | Specifies the binlog\_format value to set for the database | `string` | `"ROW"` | no | -| [database\_engine\_version](#input\_database\_engine\_version) | Version for MySQL Auora | `string` | `"8.0.mysql_aurora.3.03.0"` | no | +| [database\_engine\_version](#input\_database\_engine\_version) | Version for MySQL Auora | `string` | `"8.0.mysql_aurora.3.05.2"` | no | | [database\_innodb\_lru\_scan\_depth](#input\_database\_innodb\_lru\_scan\_depth) | Specifies the innodb\_lru\_scan\_depth value to set for the database | `number` | `128` | no | | [database\_instance\_class](#input\_database\_instance\_class) | Instance type to use by database master instance. | `string` | `"db.r5.large"` | no | | [database\_master\_username](#input\_database\_master\_username) | Specifies the master\_username value to set for the database | `string` | `"wandb"` | no | diff --git a/main.tf b/main.tf index 6a90c9e0..3b6fc78d 100644 --- a/main.tf +++ b/main.tf @@ -16,14 +16,12 @@ locals { module "file_storage" { count = var.create_bucket ? 1 : 0 source = "./modules/file_storage" - namespace = var.namespace - + create_queue = !local.use_internal_queue - - sse_algorithm = "aws:kms" - kms_key_arn = local.kms_key_arn - deletion_protection = var.deletion_protection + kms_key_arn = local.kms_key_arn + namespace = var.namespace + sse_algorithm = "aws:kms" } locals { diff --git a/modules/database/variables.tf b/modules/database/variables.tf index f170ad99..555bf9c9 100644 --- a/modules/database/variables.tf +++ b/modules/database/variables.tf @@ -21,7 +21,7 @@ variable "vpc_id" { variable "engine_version" { description = "Version for MySQL Auora to use" type = string - default = "8.0.mysql_aurora.3.03.0" + default = "8.0.mysql_aurora.3.05.2" } variable "create_db_subnet_group" { diff --git a/variables.tf b/variables.tf index df47571e..067fd9f5 100644 --- a/variables.tf +++ b/variables.tf @@ -30,7 +30,7 @@ variable "size" { variable "database_engine_version" { description = "Version for MySQL Auora" type = string - default = "8.0.mysql_aurora.3.03.0" + default = "8.0.mysql_aurora.3.05.2" } variable "database_instance_class" { From 177463561be10eebbd7b0225cd605b71cbfe2305 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Tue, 30 Apr 2024 17:37:30 +0000 Subject: [PATCH 10/72] chore(release): version 4.9.0 [skip ci] ## [4.9.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.8.1...v4.9.0) (2024-04-30) ### Features * Update default RDS version to 8.0.mysql_aurora.3.05.2 ([#209](https://github.com/wandb/terraform-aws-wandb/issues/209)) ([dd4e1fe](https://github.com/wandb/terraform-aws-wandb/commit/dd4e1fe49a949af461349ee1e5d4bc9306626f90)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08551841..f3c08cc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.9.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.8.1...v4.9.0) (2024-04-30) + + +### Features + +* Update default RDS version to 8.0.mysql_aurora.3.05.2 ([#209](https://github.com/wandb/terraform-aws-wandb/issues/209)) ([dd4e1fe](https://github.com/wandb/terraform-aws-wandb/commit/dd4e1fe49a949af461349ee1e5d4bc9306626f90)) + ### [4.8.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.8.0...v4.8.1) (2024-04-23) From 0fa5767b47d2612821f4dab3cb589ca3a8fafa2b Mon Sep 17 00:00:00 2001 From: George Scott Date: Wed, 8 May 2024 12:32:13 -0500 Subject: [PATCH 11/72] feat: Set default EKS to 1.26; install vpc-cni add-on (#207) * add AWS EKS add-ons * amendments * example version * add EFS, role for vpc-cni * add coredns * amendment * change names * Update modules/app_eks/add-ons.tf Co-authored-by: Justin Brooks * Update modules/app_eks/add-ons.tf Co-authored-by: Justin Brooks * Update modules/app_eks/add-ons.tf Co-authored-by: Justin Brooks * Update modules/app_eks/add-ons.tf Co-authored-by: Justin Brooks --------- Co-authored-by: Justin Brooks --- examples/public-dns-external/main.tf | 2 +- modules/app_eks/add-ons.tf | 80 ++++++++++++++++++++++++++++ modules/app_eks/main.tf | 28 ---------- 3 files changed, 81 insertions(+), 29 deletions(-) create mode 100644 modules/app_eks/add-ons.tf diff --git a/examples/public-dns-external/main.tf b/examples/public-dns-external/main.tf index 9d1a28f6..0e08a056 100644 --- a/examples/public-dns-external/main.tf +++ b/examples/public-dns-external/main.tf @@ -28,7 +28,7 @@ module "wandb_infra" { allowed_inbound_cidr = var.allowed_inbound_cidr allowed_inbound_ipv6_cidr = ["::/0"] - eks_cluster_version = "1.25" + eks_cluster_version = "1.26" kubernetes_public_access = true kubernetes_public_access_cidrs = ["0.0.0.0/0"] diff --git a/modules/app_eks/add-ons.tf b/modules/app_eks/add-ons.tf new file mode 100644 index 00000000..90acd485 --- /dev/null +++ b/modules/app_eks/add-ons.tf @@ -0,0 +1,80 @@ + +### IAM policy and role for vpc-cni +data "aws_iam_policy_document" "oidc_assume_role" { + statement { + actions = ["sts:AssumeRoleWithWebIdentity"] + effect = "Allow" + + condition { + test = "StringEquals" + variable = "${replace(module.eks.cluster_oidc_issuer_url, "https://", "")}:sub" + values = ["system:serviceaccount:kube-system:aws-node"] + } + + principals { + identifiers = [aws_iam_openid_connect_provider.eks.arn] + type = "Federated" + } + } +} + +resource "aws_iam_role_policy_attachment" "eks_oidc" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" + role = aws_iam_role.oidc.name +} + +resource "aws_iam_role" "oidc" { + name = join("-", [var.namespace, "oidc"]) + assume_role_policy = data.aws_iam_policy_document.oidc_assume_role.json +} + + + +### add-ons +resource "aws_eks_addon" "aws_efs_csi_driver" { + depends_on = [ + aws_eks_addon.vpc-cni + ] + cluster_name = var.namespace + addon_name = "aws-efs-csi-driver" + addon_version = "v1.7.7-eksbuild.1" + resolve_conflicts = "OVERWRITE" + } + +resource "aws_eks_addon" "aws_ebs_csi_driver" { + depends_on = [ + aws_eks_addon.vpc_cni + ] + cluster_name = var.namespace + addon_name = "aws-ebs-csi-driver" + addon_version = "v1.25.0-eksbuild.1" + resolve_conflicts = "OVERWRITE" +} + +resource "aws_eks_addon" "coredns" { + depends_on = [ + aws_eks_addon.vpc_cni + ] + cluster_name = var.namespace + addon_name = "coredns" + addon_version = "v1.9.3-eksbuild.11" + resolve_conflicts = "OVERWRITE" +} + +resource "aws_eks_addon" "kube_proxy" { + depends_on = [ + aws_eks_addon.vpc_cni + ] + cluster_name = var.namespace + addon_name = "kube-proxy" + addon_version = "v1.25.14-eksbuild.2" + resolve_conflicts = "OVERWRITE" +} + +resource "aws_eks_addon" "vpc_cni" { + cluster_name = var.namespace + addon_name = "vpc-cni" + addon_version = "v1.18.0-eksbuild.1" + resolve_conflicts = "OVERWRITE" + service_account_role_arn = aws_iam_role.oidc.arn +} \ No newline at end of file diff --git a/modules/app_eks/main.tf b/modules/app_eks/main.tf index e0470370..9210ad9d 100644 --- a/modules/app_eks/main.tf +++ b/modules/app_eks/main.tf @@ -14,34 +14,6 @@ locals { } -resource "aws_eks_addon" "eks" { - cluster_name = var.namespace - addon_name = "aws-ebs-csi-driver" - depends_on = [ - module.eks - ] -} - -resource "aws_eks_addon" "efs" { - cluster_name = module.eks.cluster_id - addon_name = "aws-efs-csi-driver" - addon_version = "v1.7.1-eksbuild.1" # Ensure this version is compatible - resolve_conflicts = "OVERWRITE" - depends_on = [ - module.eks - ] -} - -# removed due to conflict with -# AWS Load Balancer Controller -# being installed with Helm. -# See: https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.6/ -#resource "aws_eks_addon" "vpc_cni" { -# cluster_name = var.namespace -# addon_name = "vpc-cni" -# depends_on = [module.eks] -#} - module "eks" { source = "terraform-aws-modules/eks/aws" version = "~> 17.23" From 5b6b82c545e7d77d829286333e41def3ce617e47 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 8 May 2024 17:32:43 +0000 Subject: [PATCH 12/72] chore(release): version 4.10.0 [skip ci] ## [4.10.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.9.0...v4.10.0) (2024-05-08) ### Features * Set default EKS to 1.26; install vpc-cni add-on ([#207](https://github.com/wandb/terraform-aws-wandb/issues/207)) ([0fa5767](https://github.com/wandb/terraform-aws-wandb/commit/0fa5767b47d2612821f4dab3cb589ca3a8fafa2b)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3c08cc5..34a7af2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.10.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.9.0...v4.10.0) (2024-05-08) + + +### Features + +* Set default EKS to 1.26; install vpc-cni add-on ([#207](https://github.com/wandb/terraform-aws-wandb/issues/207)) ([0fa5767](https://github.com/wandb/terraform-aws-wandb/commit/0fa5767b47d2612821f4dab3cb589ca3a8fafa2b)) + ## [4.9.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.8.1...v4.9.0) (2024-04-30) From 1acacda176adb6d645eca2789f7c19f07b5fdc19 Mon Sep 17 00:00:00 2001 From: George Scott Date: Wed, 8 May 2024 14:14:02 -0500 Subject: [PATCH 13/72] typofix (#212) --- modules/app_eks/add-ons.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/app_eks/add-ons.tf b/modules/app_eks/add-ons.tf index 90acd485..52d50e4a 100644 --- a/modules/app_eks/add-ons.tf +++ b/modules/app_eks/add-ons.tf @@ -33,7 +33,7 @@ resource "aws_iam_role" "oidc" { ### add-ons resource "aws_eks_addon" "aws_efs_csi_driver" { depends_on = [ - aws_eks_addon.vpc-cni + aws_eks_addon.vpc_cni ] cluster_name = var.namespace addon_name = "aws-efs-csi-driver" From 4ab44af5490141f3a50c9cd3589566580862f9a4 Mon Sep 17 00:00:00 2001 From: George Scott Date: Wed, 8 May 2024 14:29:49 -0500 Subject: [PATCH 14/72] fix: Update to readme (#213) * update * update to readme --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 6efdd719..0bc592c9 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,10 @@ Users can update the EKS cluster version to the latest version offered by AWS. T Upgrades must be executed in step-wise fashion from one version to the next. You cannot skip versions when upgrading EKS. +### Notes on EKS Add-ons +If a terraform apply fails because an add-on is already installed, remove the add-on using the AWS console or the AWS +CLI and re-run the apply. Running pods will not be impacted. + ## Requirements | Name | Version | From f98a053f58ebaf07f248f617770be129ce0cb401 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 8 May 2024 19:33:34 +0000 Subject: [PATCH 15/72] chore(release): version 4.10.1 [skip ci] ### [4.10.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.10.0...v4.10.1) (2024-05-08) ### Bug Fixes * Update to readme ([#213](https://github.com/wandb/terraform-aws-wandb/issues/213)) ([4ab44af](https://github.com/wandb/terraform-aws-wandb/commit/4ab44af5490141f3a50c9cd3589566580862f9a4)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34a7af2f..9555cdcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.10.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.10.0...v4.10.1) (2024-05-08) + + +### Bug Fixes + +* Update to readme ([#213](https://github.com/wandb/terraform-aws-wandb/issues/213)) ([4ab44af](https://github.com/wandb/terraform-aws-wandb/commit/4ab44af5490141f3a50c9cd3589566580862f9a4)) + ## [4.10.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.9.0...v4.10.0) (2024-05-08) From a1763f93ef507a99e76940fc8c7a0223b5498ff3 Mon Sep 17 00:00:00 2001 From: George Scott Date: Mon, 13 May 2024 11:41:43 -0500 Subject: [PATCH 16/72] fix: Amend standard sizes (#214) * change tshirt size * changeg --- NOTICE | 4 +++- deployment-size.tf | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/NOTICE b/NOTICE index cfcc8b0e..4b964c25 100644 --- a/NOTICE +++ b/NOTICE @@ -10,4 +10,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file + limitations under the License. + + . \ No newline at end of file diff --git a/deployment-size.tf b/deployment-size.tf index 60a93f2a..f6aedbe6 100644 --- a/deployment-size.tf +++ b/deployment-size.tf @@ -7,19 +7,19 @@ locals { deployment_size = { small = { db = "db.r6g.large", - node_count = 3, + node_count = 2, node_instance = "r6i.xlarge" cache = "cache.m6g.large" }, medium = { db = "db.r6g.xlarge", - node_count = 3, + node_count = 2, node_instance = "r6i.xlarge" cache = "cache.m6g.large" }, large = { db = "db.r6g.2xlarge", - node_count = 3, + node_count = 2, node_instance = "r6i.2xlarge" cache = "cache.m6g.xlarge" }, From d4e01bd35caf0818b7cfa1b218b2d3e5d38fd971 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 13 May 2024 16:42:18 +0000 Subject: [PATCH 17/72] chore(release): version 4.10.2 [skip ci] ### [4.10.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.10.1...v4.10.2) (2024-05-13) ### Bug Fixes * Amend standard sizes ([#214](https://github.com/wandb/terraform-aws-wandb/issues/214)) ([a1763f9](https://github.com/wandb/terraform-aws-wandb/commit/a1763f93ef507a99e76940fc8c7a0223b5498ff3)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9555cdcb..8a3e8b7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.10.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.10.1...v4.10.2) (2024-05-13) + + +### Bug Fixes + +* Amend standard sizes ([#214](https://github.com/wandb/terraform-aws-wandb/issues/214)) ([a1763f9](https://github.com/wandb/terraform-aws-wandb/commit/a1763f93ef507a99e76940fc8c7a0223b5498ff3)) + ### [4.10.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.10.0...v4.10.1) (2024-05-08) From a07a45e6d5b979ec2ef8fbb79b63a5d15867da08 Mon Sep 17 00:00:00 2001 From: Aastha Gupta <71313011+velotioaastha@users.noreply.github.com> Date: Sat, 18 May 2024 10:01:49 +0530 Subject: [PATCH 18/72] feat: Changes to Connect to AWS S3 and KMS using IAM role for EKS service account (#186) * added changes to implement irsa role for service account * addressed comments * Fixed naming conventions --------- Co-authored-by: Aastha Gupta --- modules/app_eks/iam-policies.tf | 20 ++++++++++++++++++++ modules/app_eks/iam-role-attachments.tf | 7 +++++++ modules/app_eks/iam-roles.tf | 25 +++++++++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/modules/app_eks/iam-policies.tf b/modules/app_eks/iam-policies.tf index 6b0b11c6..6ce0528a 100644 --- a/modules/app_eks/iam-policies.tf +++ b/modules/app_eks/iam-policies.tf @@ -43,3 +43,23 @@ resource "aws_iam_policy" "secrets_manager" { name = "${var.namespace}-secrets-manager" policy = data.aws_iam_policy_document.secrets_manager.json } + +# IAM Policy for IRSA +resource "aws_iam_policy" "irsa" { + name = "${var.namespace}-irsa-policy" + description = "IRSA IAM Policy" + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "s3:*", + "kms:*", + ] + Resource = "*" + } + ] + }) +} diff --git a/modules/app_eks/iam-role-attachments.tf b/modules/app_eks/iam-role-attachments.tf index e82fe63b..5f0b9ace 100644 --- a/modules/app_eks/iam-role-attachments.tf +++ b/modules/app_eks/iam-role-attachments.tf @@ -52,3 +52,10 @@ resource "aws_iam_role_policy_attachment" "node_secrets_manager" { role = aws_iam_role.node.name policy_arn = aws_iam_policy.secrets_manager.arn } + +# Attach IRSA Policy to the IRSA Role +resource "aws_iam_policy_attachment" "irsa" { + name = "irsa-policy-attachment" + roles = [aws_iam_role.irsa_role.name] + policy_arn = aws_iam_policy.irsa_policy.arn +} diff --git a/modules/app_eks/iam-roles.tf b/modules/app_eks/iam-roles.tf index dc70d132..9654b4ce 100644 --- a/modules/app_eks/iam-roles.tf +++ b/modules/app_eks/iam-roles.tf @@ -1,4 +1,29 @@ resource "aws_iam_role" "node" { name = "${var.namespace}-node" assume_role_policy = data.aws_iam_policy_document.node_assume.json + +} + +# IAM Role for IRSA +resource "aws_iam_role" "irsa" { + name = "${var.namespace}-irsa-role" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Sid = "" + Effect = "Allow" + Principal = { + Federated = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${aws_iam_openid_connect_provider.eks.url}" + } + Action = "sts:AssumeRoleWithWebIdentity" + Condition = { + StringLike = { + "${aws_iam_openid_connect_provider.eks.url}:sub" = "system:serviceaccount:${var.namespace}:*" + "${aws_iam_openid_connect_provider.eks.url}:aud" = "sts.amazonaws.com" + } + } + } + ] + }) } From 547ab22b1b374979b15f6d1d4a44d5b55353e4ce Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Sat, 18 May 2024 04:32:18 +0000 Subject: [PATCH 19/72] chore(release): version 4.11.0 [skip ci] ## [4.11.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.10.2...v4.11.0) (2024-05-18) ### Features * Changes to Connect to AWS S3 and KMS using IAM role for EKS service account ([#186](https://github.com/wandb/terraform-aws-wandb/issues/186)) ([a07a45e](https://github.com/wandb/terraform-aws-wandb/commit/a07a45e6d5b979ec2ef8fbb79b63a5d15867da08)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a3e8b7c..df212ea5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.11.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.10.2...v4.11.0) (2024-05-18) + + +### Features + +* Changes to Connect to AWS S3 and KMS using IAM role for EKS service account ([#186](https://github.com/wandb/terraform-aws-wandb/issues/186)) ([a07a45e](https://github.com/wandb/terraform-aws-wandb/commit/a07a45e6d5b979ec2ef8fbb79b63a5d15867da08)) + ### [4.10.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.10.1...v4.10.2) (2024-05-13) From 45e1d746f53e78e73e68f911a1f8cad5408e74b6 Mon Sep 17 00:00:00 2001 From: Aastha Gupta <71313011+velotioaastha@users.noreply.github.com> Date: Fri, 24 May 2024 11:08:25 +0530 Subject: [PATCH 20/72] Added typo fixes for irsa role and policy resource name (#215) Co-authored-by: Aastha Gupta --- modules/app_eks/iam-role-attachments.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/app_eks/iam-role-attachments.tf b/modules/app_eks/iam-role-attachments.tf index 5f0b9ace..92f0ff09 100644 --- a/modules/app_eks/iam-role-attachments.tf +++ b/modules/app_eks/iam-role-attachments.tf @@ -56,6 +56,6 @@ resource "aws_iam_role_policy_attachment" "node_secrets_manager" { # Attach IRSA Policy to the IRSA Role resource "aws_iam_policy_attachment" "irsa" { name = "irsa-policy-attachment" - roles = [aws_iam_role.irsa_role.name] - policy_arn = aws_iam_policy.irsa_policy.arn + roles = [aws_iam_role.irsa.name] + policy_arn = aws_iam_policy.irsa.arn } From fbf837991cd26585f98adaace82787b5195fb107 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+zacharyblasczyk@users.noreply.github.com> Date: Thu, 6 Jun 2024 09:40:15 -0500 Subject: [PATCH 21/72] docs: Operator Upgrade Docs (#206) * docs: Operator Upgrade Docs * adding apply output * Egress --- README.md | 11 +- .../images/post-operator-apply.png | Bin 0 -> 25206 bytes .../images/post-operator-k8s.svg | 1 + .../images/pre-operator-infra.svg | 1 + .../images/pre-operator-k8s.svg | 1 + .../post-operator.tf.disabled | 113 ++++++++ docs/operator-migration/post-operator.tfvars | 12 + docs/operator-migration/pre-operator.tf | 112 ++++++++ docs/operator-migration/pre-operator.tfvars | 7 + docs/operator-migration/readme.md | 267 ++++++++++++++++++ docs/operator-migration/variables.tf | 145 ++++++++++ 11 files changed, 667 insertions(+), 3 deletions(-) create mode 100644 docs/operator-migration/images/post-operator-apply.png create mode 100644 docs/operator-migration/images/post-operator-k8s.svg create mode 100644 docs/operator-migration/images/pre-operator-infra.svg create mode 100644 docs/operator-migration/images/pre-operator-k8s.svg create mode 100644 docs/operator-migration/post-operator.tf.disabled create mode 100644 docs/operator-migration/post-operator.tfvars create mode 100644 docs/operator-migration/pre-operator.tf create mode 100644 docs/operator-migration/pre-operator.tfvars create mode 100644 docs/operator-migration/readme.md create mode 100644 docs/operator-migration/variables.tf diff --git a/README.md b/README.md index 0bc592c9..3f9de70e 100644 --- a/README.md +++ b/README.md @@ -102,13 +102,14 @@ resources that lack official modules. Users can update the EKS cluster version to the latest version offered by AWS. This can be done using the environment variable `eks_cluster_version`. Note that, cluster and nodegroup version updates can only be done in increments of one version at a time. For example, if your current cluster version is `1.21` and the latest version available is `1.25` - you'd need to: 1. update the cluster version in the app_eks module from `1.21` to `1.22` -2. run `terraform apply` +2. run `terraform apply` 3. update the cluster version to `1.23` 4. run `terraform apply` 5. update the cluster version to `1.24` -...and so on and so forth. + ...and so on and so forth. Upgrades must be executed in step-wise fashion from one version to the next. You cannot skip versions when upgrading EKS. + ### Notes on EKS Add-ons @@ -252,7 +253,11 @@ CLI and re-run the apply. Running pods will not be impacted. ## Migrations -#### Upgrading from 3.x -> 4.x +### Upgrading to Operator + +See our upgrade guide [here](./docs/operator-migration/readme.md) + +### Upgrading from 3.x -> 4.x - If egress access for retrieving the wandb/controller image is not available, Terraform apply may experience failures. - It's necessary to supply a license variable within the module, as shown: diff --git a/docs/operator-migration/images/post-operator-apply.png b/docs/operator-migration/images/post-operator-apply.png new file mode 100644 index 0000000000000000000000000000000000000000..9ac05b96f6cab9a8616bfb2728fc50fd9458bfe8 GIT binary patch literal 25206 zcmeFZc{p2L`#;)QOVK{sDvIiW8e3|XsA{RXr6>}qnumy)7~3k^D%z@uX4-}Qw#yLme8+jlk@`r6p= zz`e84qP;R(?{D`b+hlHnMmf~u{P`9xJUwtNj`KqM!1cRA?Ju4yQ`RJItqVzQ9fX3a z<0Ri(kS(DH#43?N)n>(^aJVQB(T;~>(lzD{V_)=pCvj(&cge#_AIZmfTmAX8hu7bp zjPD?}ePeiO4TpmIP{Mw@YpJ|tDFwXzb{M4~Rrmsn4ATIRjPz5%Nvot7wMk@vqeDI1JM5kV zxHOfO0z^$jAKq|t-@UZ1m#bj!irl~S|v$63$DfaQvefwkG_8np$?PtFP*{^;34!(`q$HD&m zi~YKld*EMZIk340|8>0Y{qKg_<~sWN>`!wCUneIoKUbLl?!odA_NDM{_aFE_Fg8+m zfI;Q#9bu20}?&<{~=lS#JHGLhO)y;0{{?ncPAK-$kzrVM-ynIklkX(?W9L(26 z{+gPan*3D-c?AVob_-cQxR<|uu&kHg#lJ7|uj|}$@^kQY^Y(Xxd7b}#UHeC{0Dr)R z3%_sl@4vsF(<#{Pf9~Yv_s_7{1C;-LM*f=IRr!Bko849O_qXbI-GZGwt#7$O+0($sW-E?)tw@{Xe@}_&NFNz@Y4V`UC%GY5wW_zfb^6 zyOw>)M5oB3`~LHo&a)r0<3Eo4zmNIf+5AsJ@?Y8f58C!$P5-Y!`aj$F|Ekb`Rp|d~ z2L9{Q|NkE$Rm+DyQ8-dRUl63*w=+XPi)ZG51>oV9w{p)NQtjxrmrv6BSK#<1`^H}S zMW?MC@IvNeZuzn|PG|&CLn~82ZnjT+YOKt*jM+C5PsbSkSlDCMS3?PXiZ~aO?37sG z4i^?~bSwAu0T7m}$QV?1NVN~##V3D&$^T=!uiR(gKEi6q(g3_y_C&B(Lul^S?3*w1 z4{m&D^e}de$ylj*gLk@psQdO~zY@@fzcaH$xZhZP$bTI~`ZAu4z|8_H$c4Z`q;clm zE=|9M5+(vDr5C93%mxw7j(DH=@-L2-T5f@^-U&%qk{wFDu{Q7kw7^8*_ZH2W({3|I+4gB*YQu#e#$2Ila)&Y zQwz8(L~($i^mtloFhLO@7Z9tLp+8YOp!-xUeCgde4^^oNe&`!n8sTjVrmjsX^8*(f zA4sNsS;$chxE4^>hS@25Bda0fQ zIDY;ldtu)4NzTzHxnP5O&Rz9Fa<2m^wpMp#R!$ob=N0UU2bHI>PYV5$L+sLWaJA>y|fs$%8R)(e8n2is8|B zq3PnW`wz^EpP*c;I3R72wgPPpjGW9vnn{HYomP#$75h7aJ)xVc1MRp~41qGq^wh3I z#MRXbqt#=sPFu$n1Ie;lE#^|{7N;Rc?p*ub_~QN+q@63GK5HiuCKvn()DHgD^Ox9x zqtE_I3&z|hDSyz@mhRKJ{Oa%dQuyn6D}B428ZMdtk~VbQ9ni{6*g5aSQzY5Udn(sR z@%8h=CnT>td0(hW)wn!qPV53G{+jvORo8ssZ=-+XB zRaJz$&t&UgY25U+KKyUwd4AylKAfoYp10qL&wnwWyvwZhRQ}rdC&0KYbNAF#EiXdt zLO%HF=zRp5E;-e5B(G&#)KuR}u`kW4zv9vR1PHkXfE2r%a3ITJz8>G@CwKT$<51^1 z*6uP_#HNv)3a0kA%2%E`%Ua=bq|6%5|LW5@j74MWxe(SDZv7knc4mILila1cJbD|K z+aj6}O7OQ1AUp+mCP4on)pxIN+`ErpWWOF$T>$8A$mN0Gp-Vi&-AWg8tO}RRWLxEh zC`xf|?8!EJ>?d>SRGiqulV{;=s@B_NxNqfZq0<#-vES44?Bv7qWtOAW6yUU6go z2L*qJJnW8@KbG8yD=>M%bMe$E@fH|oGEw_Z&u^UGefRvP(D!QwHE#PY?+uIP-(Tc+ zNIe-o*S`f8DDC+6PdxTI^hpaju$yG=@|V!c-Kl$@nk)=s$GiIhS;sTf%#<}7WEjUB z;@FLEygAA?E;pA_y^^DBHw=^DzRhk{QRl|+G8CzC{di`}_3gE53hRUSp5QGgdD6IP zO}5NjYKnEd@c70_t}Hdb6%i$zOqP0t0?y)r$nV8GZ}>#a>J`7N`RBteL14zZ!5GRr zX}!t&^Q(ry_cdO5;M>u!Q1F)};jV9>RXsYtr(II<$*2`VPZ%Bc`pVh&Yt20g&XsY0 zgxl~55lt^7Ce2=i)!(fiuHSu76RoWna}Vxy@R7)YqwhX^c@q`ugefLFwfYr*w8x1M zG)2ay&WEN#lT{*jUruhfCO+Ug6~!r(^oA{O=|}8N-pJKF#J1_49dAp0H`WCP#6&oS zMfPR@BOac4IGwQHQ&PcsY9<6Ye*YOqv;@kx{5o_h=Yc2TC8ctgblMja8Q#-4URW$~ z7w&cXkvLna?nWG_DA&XnTzvmb8yDMEN$82IbYBbzF%`Lx&EcOsZkIh5OuI%qbSHmV z#T0+~(K+o@pV%XhzLp$rj3-HCa|D`dezcw{wH&X8CIt`P$x(GL(UOeMWH<@yZ5;_Q zI+v!3^k~EzbToE7=)+?NYto+y$mtZ!dBrcHT3880pM58y5%2{Tp8AH2Vgfw|8`H=SmA}sKmI)0_WjSPjEQ9eEk?u)I@Q7*!B?!Bw zmYsaIB4M7SI>g$p0dLq=Vvc)dez=?s1j$T_FTy2$^vjSeAg?tYY{UcA zI3H?7{pI;O1=9&`&(27^3Bk|2rQVM+3njRQ^uDfgaq5_nt%QEn9cv1kV9IPXdOuI< zrqPZ`&Z@|**bX*eKjvlnFX1gr(hGDMCDZA&H4ZC0ZM_*8A$JSKf+(1&GgQ#GjmADI zo4H+Qv~Va>PM6-7x&I@?PLlMkZFVVS8d?}$LK{w$2S9ufL;b#`P7JhIl2>+0*3eQ- z60_INQ%sH_8x2I8GmDd9kjW_L|iQGW*<)JLrso`^OvU6#f+CH@jchqW4d z^wdu}X!fczs7q>7u$g*>acTwLbS*1u;5&O#*d z7=h`NIS@2{HGO@T^j_szM8^a7j2b6mcPzgP{XTLxEyK0_%d+~1(1%$ZM$tRnvD4Q( z7#Ty3n<%{4sj}^H?$U+w5dWeQWtDRL&P42AaAU}iT(`?x%UZy9h4(*QKW&0fn?=^T z6s))Xl|$H`Dt-Zl_g&}1P|nf(?wC+j!t0Id=PwM9J^HT&t}PxLvu}I>-Y$mcNe^~R zDp<4={OuJQ&y-WtN6~@n^=LXV$Fm{<9}CwCwhWXVpHspF*`R0?p=*jI2W-YZ!*;w# z_DocHVWn33%IPPV9l-8E#U8b*)>2Zf+!}scbNXXd&Z(Z^4oS;bBlQ?*Pv@VD<=|*5J5qrC7{8~M#GMEH zIV-i|IY_BlIZv~NjV@ms?6_|iDc07 zHTSTIU8v*%sSvJkK;2a4ieh{WWP73YSWFnvmKrY4T(%h8d;b-MW)26AwZJ;ZV+lqd zx;8X4aiRWG=kc^d9$mYBJ^*|y$kXY8`tgoTtWm5p)8^IAA&K9M@} zmVjEag_r456`focyOYPKvp)=0uQ0d{BDvr28SBl*P$#_*W}nt=Wv;c1Yxb^|?FGaS zL4cNsJ(iYD>To(c1~USQoavWX4^5;{o96SXbe-2of`uznp^1DOl(j$mYm6Z7SwQe*}BqRWur$UUw8%ogD!K3}qJG_2j5ZPYQ#s_xW zbrJ_-^;{ui)3ixJ`@Td}(vc9$EDxK;jOwX%(V)&*NTXia-Udflq!GE2KYZ~oH;A z1X5~I6&%~FTt3jIOeyO|| zH`y8esPJ(7ZaRZ%YE7kvCUD{wH5(*?dutvmAE}5wnAsdkELj@17N1sVt95^5 zhvwpFL#<8H!{dr+wn2CEy8n?_eUTxi6ixD$n)x~M3y?6e3`?C`{Z7_6Fr8?ERwR&` zv=#NX+9PLvT~n(q@A3~{BFz8zk$y1@kM-C6c*UrJ$qG7TnN{Fr#dW=!ZdCi#X>trX z<*}2@E4Q98`DpyJM|0?Y*NWH7TvIf&=B|lcQ@7@2VaCk8 z;I|k%{^JVq+Q77Xx80X&=~sS{_vV!cMqKVawH#$+U;WbfX*Vqt%h39(J~7~8Qcn_X zYyQmTcjIacPW{+(LMq!6lsAkzFTWo{dA?+M;AIOWtN+n4w!?okrq$&ya*aT2ILa2M zVL}M{723!5p_oNl*f`?Dh7EyEpOiNCNk>MEEuiKqM&7mrW zU=u|sOaPwN>0fg8W^SBYt}B;D zYK!8jJ4?QPDE1oZ5g6dAb8>w_t!f!Rw@_`v%+fG9jID1qYppgtEE#_4Q`QwP`mM4A zKRhvLny3E)!Mj-+YfRQde|DK_SbZH-z9DMsb>l|dbu={8Sk#a5;m+amt!MO2UeIz) zS3eHaF0B4WKxVXgHdg>LG<~@gXe=WBs<9oF6&P&`Yd4GYJL@48?h?Mx{ZzanvFK1X ztB$%Fz9ygay@#1ec6^Qfbu8Uef*+GWCrN3#PJ9eZ_(%-;H9tGICchO_k%ele_&H@O zU)uV-I4bSK`#Wf(@|Uj@K00-g@}gr|Y*!5d5u-GRW+tYjQtJROA25 zz5ml23ZcHhA}fmc>NG`L(WsCjOyIZ;BGlEZqXWSST+x^=Ry zL9rpTqNid&3O$XTNUITBViTgsv=8Fn(eqS1Kc6%c*tgdpSK!mPRa|_lN!wR=>lH(k z=RCwMj;xl_12aaKDbN(HE@>^yS>*i4@@VXf^OMAfb-;5*2S;%J#ZWHeCLw558#`!L zW=N@5KZ(2B&0Ylf5xNPVk_bP6FdveoH>EdUiP#+>DO4tcHeX=oHnhTY-5V#kcFt^X zGP4HQSj8n>@=J{KuG6pZ(K+6R;uF3Cbr^C6CXD~b=Zi0oAF5WV$waZl8rGy$#06_S zS_8vCm^-Y z>HywgW+&&F*%6wn4Ighzq=?Sa^qcN{j?Ol$6?dlEgp~a?1smJN#WH)PM*T}&1vOnj zUuO*CJDnzrNOvO9Kf9cK7kbjyyJUfhX&QNL#C6_rVT-#eCB-5ML8%n0aDWQf;p4<8 z5V9Hx>nr4vpQwFHtHx|LL`E8}Ip~&$oEwdS+?h%XRQl9%Y%@CdfCHLdnJ$9&lSwZq zHoja(82#4e^tN1^=;*5!KI~Ks{-DS*5lT5LI+C(|1n2NV zwtkY}0OJJmu1}6Pc@=mnIJlAtPkLE%?D(u!COU(^jtnonmD+hz4u!_hRpsC4h)az1 zOYEz@AVH_@XzhIf?P2S>f^o^Ez-Uh{X`|U;2UaFDZnMu%pbqx_T#qVz`Puaq%}Y&| zl-E(VjJOCMPMZ@Gels@+ZOCor{1aAR(7cQbTeXr_{-~Dqin68r=f}G%>01Ma^RPX0Gy(s5LaLmPwy-W}b(*x6074Ji8V4=a7SMHjh2@K>kr$z* zLBIsg#c^A!rg=U@Z*}Ed7)k?h(}WB46SI>oa1|!xR18L;Q3*xEGgg z-ze=qIecCS0=mdjS=K){+d8eJ?tQ$o(JuS1sR@cr^%VdUl|u(bES5nqCvoxUkWs`p z!uEJbI)l=|H_=E@$uW85uu$_eVak1e^NH)>*2Wey^^PMMJC@K8!Ow@MIAFUA7Gvw* z+|>rWS7LSJ1rm1;C?Y5OAA9M_2k%|CX51m$U8?PY$V(&!^NR;5c6jY^9hrE02_p(} z%`V4Jm&%u}g4-=n;e6kZL!NK})eYODU7uB~?ca0wM&A`t!0kocRz!V0S6sf+krT$f z-WWEZ=lmi;DNXHJfNk@)w#kU+O7NNE8sS-`thM z&uaGUN)|YUBqRVezYm7;cfM?b#P!T8taQHU`})#G8})56brDcMT^Z1|;sYS? zz}5sfM{_0g>y6)e(wU^skKfhT!;alCiyLdg4`E{XM>^A!TpZ^neFi7i`&_sz!1)%Q zr)NUE41>0sg%`AQ>DaOqoP|#2Iq$Qww%xb!KsuK0U<;oPdK*dJ*0Hq>WG4u4b^{e{EjM#k zH-up20iL+^7}hF!7aDilGV>|b1I+}I`9E9A1d5E<3M#L7Rd|Vug4(n7)m!D7*1oEg zCKGD_8kZXnXf<}Q%AESE#YPG(0hQm?3G|*9$VyeoEMS0v-R1^grH4I+lN65p1*PZ^ z5ud1DQXuEZpEgBHkIF9%*H=Fkm-JML_Lbko=@9vG-QLjeS7|hScfWOhpD80maqNCQ z@QwC-0|L|av~w8g+hYkw&7gv$u(dLy=Px>k)_p3w&T~&1Ye+el$<95u1mc)(62>~O zVu6fIOS{-!J`0=k>mxYn`6V<>RMD6V%1WOGEE(&h4Sj~S1rBJH=gvlqOJQps29LoO zLQ4fhGK#}uO&$u!70&)ZsD_nk-)5H?{7Z_DPX)VXHtdqFk@!JW>Pz=MPCQ?1qP29N zk|_tC^40|%fp!gYOfq(Os=bh<-dx@#-$X3vpoByK?XN{t^@OF$?oAb=t_*Qr6^^-7 z*aZCB-?3I?oS&WPw+r1}kJmCta=U7__(Neu5)Sa-ysxyl!$77I-p(VxYOeu@YgLB68(k}w}IGQB7C9o`e82})6HDEY_c=AUzzuQicrg5o9gq{*Mc7`JySQ%X0; zgq3eo9)~H2kAXK-%B=-VaW{Xhzx11WekKgP-coR&PGt_f@ltQ%2&f%uXr)Z!DOEA` zPAh1I+7AUJX}U13f<=!6v{YT|0PqBhUz)jP9NEw3HJ<{KQvsGwv1@+4XydBGutX{W?PIzIov}#tL-&6s zWjirSX|QKzQ-J!DPprJAePO)npm*iGY9e7imIdyNyp-7mo9DsMgv=>&;?#Wl*mP_EZzwP7-G) z{UokltNzmHV%fcuv*}joVXzn<1L}906iXU)ugDM0oO@9|Egcm31N~RBn!QwClF$Gl zYFu(aerUaro~88A<`-XNT;!fj92=>`h%}I`koFmSEq=agXscA8rslGSo;m{RVaV#S z#qiPZ8qV#&pKM9{_I^=Gv1UfIVfuXzIxCFK`s5T-`{TDoG+ol{4RUXT`%b?-!QHvD zHU)Pr|3a|JTqyOp=Yx0^v@>hy)N77;E2(Zsu z89h`AJyG^eRe|Y*GT#GB53m}qB6zy4UK5@4uB_Yk?J7UcL$3ReP<@i{g=aNsqT@)T zf$1X;ED^j%&lWUKYYJovE~VC%7@1}lP&qjK0c^qfW39^eC4)TXv1 zXHnaGkEi-f3?@^Wa|>GaK)`lIPRdC4nQC%*u7T9V789ZJu>18F$?0uMF|lcY2Q(^^ zEBn0WGgf{*>v|__gh}FxwBO6y(tR#k*&^$pj!0ajsEj*#xvRQ0&Tep$Wbc^Tv~!kS zoi)%s#ughomuh>p$L>||GTKe*jb~n+0Pwtt*a{Ok<|#kd_aPT(vckNBsX*qhbu`&9 zUcDByI=4{=UG{~}(Wi5!u8h+yu`ie1pa#rxTdS3dMzKWezA;l)hA%iWZA zw;|+joE?yEaKl=)aM%S94q)NxJ`JQUY4?A5O1&wo5`NPs`bY^i;T5UAuWjLm4|v13 z#-$gjViP+xW#$ecfyX(cQ3#HY5^9mXNe$Q_FeEe|9v}MR3S^B7vof-4^!{)eabWU+ zwBM7e0$ZTh{WsjjySr9lXN@V&<)Rb%8I=$ZW>jD!8_L zTf0%RE@RZwJqZ!8X_V-C6_WFaooQ! z33OP6K+q}alK)PhZ1eHzy{@RkwMe}vyrkRlI($u<;XHIz(f<;VtMS!@gua0BNS<>H zR^%d>BPi8fv9F#0>-{VQg~c%Mz$PaGym`q>8q?K;a_H4$Q*+=s3hB3**xn6Hhmrf7 zoxdKgj?K@HO)#h;1-yp`=GU^^KJeq&X^+b#Z6kQ) zO}UU}W_`ff>~51}6gi|pCG19JQ4TUC1fsN>{y;kkFjF|=-LJ@A zi*&tn-RYnqz6Q-cFoLkFcgoM~@FQ#Zw8yJGKAVWCFmj?W`Wm6}_)l8&foo16zB)kg&uPv*WQjiy@)8TvXIHmr%1&kU64N*Vwj360KI)?XNw^ z&fzbhgTU>SwZ;Vn0*RG~jppyS?^DJS&KHZ?$rQCd{aD~w&n&%8nXNhy`;uJTS}|;C zaF=REnvfrI;b+L&(*hkKRn+$#nF-cH%dE0A<^0!pT6JN#*1|^r~t{jN{ZJ#rEzE@JH)Qy+SuyX9#}G60=- zpylsi+hna#5~kk{mML$LqCGcMGtXnELPdyjSn4A+6u3Y!FqydZuA=mjf1;^XNRt(w zWr-lC7AMf(^(N`*@oW-CC4y$&gv)a2yu~GzYfIenZCGckMnS=4(HwR((Qg9>Ixajv zlT36dqeV7CaCM|)@8mDcPQy`XE%~7qU9fg6XM3QyS$Y7?OE=;`p_uX z{?_krdKD-KJzHwQFNqIW2^$VZ!sJhQVw!k$oz9Zt5a(Zn1)2G1K zr!IQ~65qG0-#fG}cF4BB@>$$oSYYw(-(W@-Sp$t+IYvLjXAaC-tA`k-gMZb@QEfp8 zg1##oeRb8x%_~_siTGS`k@EkS*OBR{Bl|n=K6gf#pSuoZzK+xD6lO%04HG)0+pW!! zr$3J!DqTLx*|9MpJsKdBAj!^$-y6&%sk1CS`6UD+u)Wz6vl*8Mn7v)5ylL1R_MRm}PuQdEGR6Y2tLv(LgZDJOy;y+jf zx=nFe_bbi2exVvpt-LvbS@b2bCqk~=Lyne>;@t$PKAGjtHFEaUUnJhNsf0pPu-#W8YqL8WZ37(aYjjL&OADIS=cm;ZpWMjquJP#{(Sj5 zH016P&vM&J2pmtOUFsiGR3W=fR!%gJF-FMXuqGe`lL(Xx*zfSe7r=|AnOqk+ zhW7sQwhV=y^#x0>`-ptjNTZ15CX*t!X*qE`_KsY9UfJ%M!fpsm{l&|w(2os~dv-;I zFCho}ui`)FFEC7jH-WPWn1Ei4Yz}kdWun{a8C7?|wDPddX;nxp^0|X|0AXhQLK&VG z)>vI$tySo^;J@4My%2)H0x-4ahy4pPjGpq~R=;Thhuo8MyFZybKd*Vj$!vzbz%YBM zcUdO|7S4W#{hXXsO;$WziI9}*XutzGt6(&`ZND^NUR4&TBmyJrk$mNP5{gL(Cf}K& ziPAf-?xm_<9%u1!r}NLSMa@P|5#m9~H!PLDoTDrnPPs+)Ef%eUbu{`uagJ&D$>mNt zeYr?mt5E$};SfGJit~(VP7i^%N|MP_WpV}KpdRS27W1exsLGVTL~nyw@JoZILmStp z<2$-dv8mTbu$nj=f67CS8I4pYf(7f1vNjmaFfBRx9hUift--2e;}hMkk4Gnyvl<&=T>(IaF}n%~ zRz}i3RYj^Sy|`3Yk$95tg01YCft2f^c*;9O{#I=o zVmr6=@sS?E%R6%C(z525jXzB{Bt`*NFModTJJ>kCeQYa9&xou~yIuh4zoyvwEW>VN z-b5H%HX~^y=-Xn%UXtG|om%8yWs^6r%{8$}s6_K8v9+QynNu|*$IA!89bp3Meji|n zn!LW}Wchb&aJUK<_hQ+0$e<^-NsQsmO!>DWt)Wchl(LS{Ha99P%M%`vOnlT90pN9~ zP}4~l($fRtpd7s&URo#A6&8uV;+%z z`n}pvD=4`pZW`bbWRNy8j5>MBJ0`GTtn3@wqts-laY0ilgm2V(yUtc<=E!}%)9bC| zVDlTTaalwe7I(^w8WOU*b*LEffYD?=EOz2x?bYoYTVpSAP2}2IvS#;Y|ZYCM?4Q&e#c^M-e*S{1;~rb ztlb4c;Zy#uJmE8KXVizBch^bR{0N@Kns`F0b}~_EYFnsde2}c*m5NtNW!6%77uu}O zFBVNzxnSFsSJ^oY(4+chL{m>tlv@_y%OFn)aS`~Wi>%bn(oV@e-GJaYc1MGy?e^R!!lzkDgitJHqapN`*s%t7D77v6=3EoX>)1y7C2G%rxX9P$g_ zKyKgZKYL1A)MmM6?171fd*kpY3JC%ShV`BO&DP99wz#=Ot&8BWL76-9=VT1$oCwS= zfrH5LM8DK2OL1Hp$%Cz6db}bPz?}~c?}yMRp!ug!c}N*EVJBf#|3_sg;dPg2^LLk( z2Kx)T)82W_^g;%X*!CG+Q}Jx~vL41rGdTk1NV@jE4u%TQ4G7xnK7q&69QK390t#1Q z_2vSr$MmHw_q(tch@G}?e%w@ZDc{}pC*k>JLW>p~Cv^86U-Gq<&Uq@;r!*~DrnjMD z&Gm$3EOIF{6;|b3JaLz0em_FG>D9!1K(7;OXX7w#R<{3Q?o0*T)ZA0?cXj-74CZ%1 zPvy}1LnJpm>RZ3zPUxYkQVe|(+1d=^a+SYf6X?bzEn+957Z!sPxY~wV`C7_?W(F!? z=fmSLG!27?)hNMWjV}pRd1Z^)=Gd#zbMIpfCdi8+7l2o<-g)&}TE4eEfq^*Bq(g-S7c| zSFn9NoBL_@!pq8_AWVABoFg}(0%;!| zo-wB!mtG3@s*QV8x7_yKd0UF|`3~1D%66b*>f)eAI5zE(UHlMUe)E_^v68ZJwU!xp z1AP2mStFG@-4XG*Rw*GPjMaupCbre1lS)4{rF)P9_6V%-prqdKY*NF9V}o<2vZW=q zVkeviIQaR=BD;3p6?ls#G&)zVSg zzV(BU5MIC?cPgAVEl#3DhRWra3CbHrDq({rc96Vv8_x4h@6&(&Td@-7n&VcoWXE5R z%P;=YG7Ts3MpUIEIG*?*#R_DHIVUIx%D*yNII0w8xkE0A{A zYVs+DbtaVbu$-*%sAM+bd^20ohZYil5JYF>SRPX4YtaWo-1v*hMDm-f(8yQ$BCx5L|1 zyeis%D$#CLwb({Z;A-tA40F!l!wL$Lu+*a^A&w*sQO#+$*1CHtCL3uN&3ag@#{%w- zNePCY#{!}nL;C{40a}9IiMfEzO@KY{r!zI2ls<6Q#Q$$nMA}q2pu@j##dU|PD!X3R z@27;EVHj4|7;h2D5FvjmNXTB#rYd+5Zq38n*>$gyYgt!k-*QA>5luf`{hZBaSkcxP zx^IxUdQcNT{DKe0jZ$IQj{OtZQ|jc+3K$BE#agn>(6trwS;;!a*gIRJ-73t1*r_7k z)rslk1~%o%3O*dnUU~3Blh}70n?knv7fG`u0Bf}sIy-|-6INq_usfuB1Z(0}w%0;$Ckm4k#=1M{q zd$Ybn3gbOhPK#@s?koZ$v58tINAGBdLepq+q=kNXoWSNXA;Dosb_&yWRPZk$^H~JBK?6T1JaFC$ z_D)jAK1>9^|!&O$^>0cDyPcxTGA%++owRxtc}n zf?3D0GVS(}WF^{0)1F!Nw<4VacdY-Gu_t@q5257yx}ro&>|GzW#5Ae|%hdabm>yb3 zJAN2S4@ExW`G*UzFncMp8oxmmpY08}jdp?UwYoSi@R#vXiY^%Sb6(gzZ%dsx-7*9AG)V(5^tDDtUtoe8_CJC`E%d}+eo2BzNp$<(;Nt+ z)b@7lYlD~wMcq#NUWn~nL0^Cd2%D5KPhVOJDeDd$S+0DsSOo&wT+DGMCumP&$m)+VfWG~t?#3_4OF`3fABkY!3Zr)+qjXGhfSx{C#^Y!+c z98E{bQ}t21mlx;2pz`ttXU*gm(F#-e*x*~nO28UIG~$*epz(g7P)C#yqthn|(y~gn z-wnz5HuM##68+ghMs9biSb;^u+f=De-QO3We3*}%1HK$PLVe5L*3@+u7m#~q?nv}U z=k1wCJnT|N3w_yLz_0ff0sJ1^IKAGY;aG6xm-S{`>c@fEj;AYoa#5joa*pjdEq=Us z6HE@fcwLcQbgu0+>B*$0OY==xc&XrN3xIgvty|=jHjWW$3LkfAxmx2B)dh`oH>jtR z5V`UF$n=i4ug1rXtr10Or}g38j!(*;&W7V|S8n_q;r__3+sYkmlIY`V(Ot?xn1+-B z>ic%{?Q$-iyFNADOZ}N08x~YUCO7b;K5AkNv|CE_;xar$Z8JJs`_&wrmh|!g>7=+H zt}%ZnJ%lMUqqm9)I31S+qwQ?=RuO;A+E!M^hpsRc<;){*#(Qmr%+F32LHHF*Omh0; z$CXi!sPvor*}Fs*g}vFOcark=$Vu;T&frNF@&%95Tam^@vb^7(T>Z&_t&lD(%ZhRW z8fG*6QCzKPxHy7idb5SMOGqoSZN0woo_kNrclJ(;PImsN#zr~mnB44$e`8}=Ltl_M zt8geKd#h;d1=)HijC9pOCG$vOgf`}|$o3po(}hfR+9@rF3;_%yxP^@PH;otU44b}b zIU$GBbfp52V#O%}+?;BsbnedjW51sYejmq5IpDGy<}X)HB=Y@S>=n`qzjtICf5KyV zYYHLB%PGlM>Gj<~9Ph{b?ysD=wGdSIE4S⩔+8F29d*f*xM=Tpw0x9x;q4}BjVjF zQGs2b^imJW&N!m}uKWHqVF5Q(*j>lpeCLMjubfd!=(nvC%Z z4M(G2g%V-_?@!Ow=0@w={Jb_^P-e^3M`ixn8&w`~pB_ma=_Fz8PBzlX86DWuJ$2)E zdn9Cerd@?MT=B_&W(}rV&`# zmAG45v^XI)5bj-1>~C?vArfnwn@2*C*!!tItV>r4k{Ibn;M%4ajdy=s9)vzHwT__P zRC5-j1}ITEj07G*H%}|S8Q)x=G3~98D?>LpUykwb_Iw~g*6lU=r{EjZ544Z)f5Oh> z6ZpGi$x0N^?GRssbv*y>{J^3#zpF9Zv>xuZDRu=xu(p>AAM+)o#fAo^AF-|_^fL{3 z*vX-;6lx$8PXjOfY;0v^_P_?zA?IbS?rh7cpnF@Nv3V9!B`%zY(t8XtWe; z^`(ZIEZcW?1%sL!f7?~R|C?Ps_55}nMH^g1Ht$V;*0SF;lv`mc=8SstDlU~%c%HfW zzP)AKzPQO)4Dnpw*_)lLPJ+cP(KnwsTmP#1Cd!Z9#N0HUlDIpA~ay^Nl<-;b$Nq8k5Hz$O9>#JX*_XRtx(NJw*Z#!6*Ymwcl3d~^}{cF{k{1oNq2 zs_Y5&zMzxSMWLae0;}HYA3ev=%*`A!n&GS}vAf^uA0^^!n=Y4RJUL!j&{b_0aMBX^ zS@M20I}voe1_a_C>X;o&V099*vgT1m(ET{8qU!Ra@MPvHfK@_sa9i-&}z(c`VcrMUtNa$f~0oG=g`$0k(ZTA z%m7gP&N=}gsUG+sPluLp@Wtx6s2MQRTeIWCF@vC`FaS`wmsccIAt2k6Ag?mJDyQOr zAzmoi!Est|<`exYuDDD{(*2g_BjHB=B~A5*gkv1K0Odr0nhz#uS`jpNa?$+S!~qm_ z2LZ&Fqz>*z{Dg?6Iyi%7uYR4UhxPJdS}#?!Dafr{vAJ|UR`hR-h-jU%tj!$sqvo0J zebe6sw+kz`+B|r#<5|zi6}g@z(2BZdwk`y*=67qEGhlnH>-B#;)}Yo-4bp|3Ks=LU znxi^AqbT}+wRhfMO)Xm-w_v$ZFDjsd6b0!5Djf+RAktApK&euN7ZJ%skQ!P*iqex%BQ-#PBt&XZAfdbyz4yMSzCXcV&RS=ZOlJ1%oSE6Pzn_3)yjl-_ zpyw8iSgk!OebNn_*(B=4l~vPd!CBg5FJb6-_1bEYOWEbQP3`b-r<0ZGXwE6vSdS1P zVn62<-DHx^V+ z136IBZ!;!4vP>O-l(V*t?J#MdRG5eR%P+g^KEL9{78Al3%W9W5U8ex$xbm<~GOs5! z0kzRY69Jo$<-k;L3UGEmEE#JEzhXbJ4X38Dt1WB*PPg>yk6y4bKFI~#>)%j zNI+y5*<=1_9k;O=S>n51@#L=_xC15t5_L;qVo7G}QrU#R$i#;QHE}q>g~*ZAQ7K<9 zje_tL(Lx|Rrdkh;`%Q0JAXW1Enc1sT75j?KkDP0e$UKA>U=r(#r8&sStngV-WnFqr ztFL=%!8}}xe~U)cCtZ?8(V`K6fK&+-vPpNV+Y&RLbT!#)YAV%-rbTI&mv(%bUPs-0Rmt@9-EJEgV^5$=m zYM@dYH&<6cH&;lELBvt3cEH-yc~#4yyn{y#qWD79zBai56k7XJgDb#R z5S(3Mkyb{-OD|fgmZ*n}c(<=o5mkkpZ6DQ>Xz6NU`a|aatuVyUno)T3*Yk&H8~4>h z#~d4X45A%s$bwO0#G!k4NHGwn_kFKiLg)?A%mY?`X%uN^D1~&ce0--d29tw%N}y0= z-1eq`bEnjG=k|xLFz30AvQS!J$AO&V2QeN~vq9al5Jx?*NUi3pkB0-khZ-1R1Mp{t zIP?m%d9KuUUpSl=uFryJED8&CE+p@aK)TrmytB1m<&Ve2y#i@@a$ZCFYpi} z*DcvkUUUJ@un+lXl(m>y*Wcx7;Fnz$ZgP!Jl%7#^>C0SF2lDxxhU{#`g-|CT;EM8{ zqz)on|7+ohbg}|yRX?Mn5bP!{{*G@%Wg`FZI55|7E>m6iQ3NSRpQtW^vP{q&;@vk%!5sa-Dm<&sY zotI=8EC|Lhfm&dnoq&z?Ywn0m?TgJWIxS_#ucHrHV^}vl2E+@ z_5J$teJ=$Rce@@`RQIVC;en(oLqU&*%jHdG(-TE{<*!9j8P#qUe|^4LqU_2x#L@ho z+H{ob#gImpmQnVdY;3pI6+??>Ml7u{5mb;WEXaOxl9#Si_@?l=8W7=M@ZL0ZeBn<0 zl&4>2T8%O}uIvKx`m%Yum)SfK`moE+@!CARC^ZREhdJAR`(@1LsI#^J^+F|la>xb> zdYfON!g?7z}02#W3L*;dd37cihdYHO~k1on2T&URJT_TG}f zm*+;z(MxDK>QpOiQU8qU0Fw)h&acU+xl%A|ja-|9x6AGC*8wrC?hqN^l3vyBAf@Ks znh&3;lXcm6TVMqkh4p;CrbAS{oD=4kxRB2PuPnNTQ7;k|B5A=8J`}mo$me`^j^}Z1 zpTQHjBpDmgRDcBpO@-M;xOko1>$tx$&bosm!-)gjYlwWYbe$rjHTBfZh3^)q^Y>r70Jj74{cSe5R{po0oA{^@|2mmCW z=4njcM&8?J-h;0CycI=7tld2_4)bWZeGGVU{Imu*VIK(LaN0h+S@D?OnIrpag%$^V z3s@keK#|2i{{^Ugw|Rsh*Bgo%lHxKbxn8ujT55|(egIgVQ#;d}eER10TF?FF8=wR) zuuK$2m;bOvrX|}OQHezGjauqd8+~##&^gHm<7T(mv8@sEU-A$^@vQGK9G~juSnjh}khBcRK752gH~kbdwR0!i<4gBouN^)a ze8|v|z{P|MBwqe=C1l_8~bNZ?3y4|Ox%hTkr)-P+7e_@7b!#}8TX z9rg%9_}7wyFV9=UVrQlJ+$F$??@Wd;u!m=>nEtFeKlIYfcW}+P#yyzeK8iE1Fp})K zumONkq61dPkG~91SnFzsRd-aD8izC=;#J3=1roRs)oM1kpc9{|y*=+pPwyBI(qho9 z38U+!o?OZt`lgw@xZ-Cc2~MCNO$jtib&moVqq!o(%`g&>_7APmbxv~6n>7_irWIs4 zLXiMd#nhNrOhw*PJ2Z?5v&8Gbv%zf-|) nXZY<5zn$TSnf28D_uK5p8AGwg+4v#WzYXYJHr7UI+TZ^V(QSQ- literal 0 HcmV?d00001 diff --git a/docs/operator-migration/images/post-operator-k8s.svg b/docs/operator-migration/images/post-operator-k8s.svg new file mode 100644 index 00000000..20ebd449 --- /dev/null +++ b/docs/operator-migration/images/post-operator-k8s.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/operator-migration/images/pre-operator-infra.svg b/docs/operator-migration/images/pre-operator-infra.svg new file mode 100644 index 00000000..c1b474ea --- /dev/null +++ b/docs/operator-migration/images/pre-operator-infra.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/operator-migration/images/pre-operator-k8s.svg b/docs/operator-migration/images/pre-operator-k8s.svg new file mode 100644 index 00000000..93719397 --- /dev/null +++ b/docs/operator-migration/images/pre-operator-k8s.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/operator-migration/post-operator.tf.disabled b/docs/operator-migration/post-operator.tf.disabled new file mode 100644 index 00000000..62ae09e3 --- /dev/null +++ b/docs/operator-migration/post-operator.tf.disabled @@ -0,0 +1,113 @@ +provider "aws" { + region = "us-west-2" + + default_tags { + tags = { + GithubRepo = "terraform-aws-wandb" + GithubOrg = "wandb" + Enviroment = "Example" + Example = "PublicDnsExternal" + } + } +} + +terraform { + required_version = "~> 1.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.0" # Post-Operator + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.23" + } + } +} + +module "wandb_infra" { + source = "wandb/wandb/aws" + version = "4.7.2" + + namespace = var.namespace + public_access = true + external_dns = true + + enable_dummy_dns = var.enable_dummy_dns # Post-Operator + enable_operator_alb = var.enable_operator_alb # Post-Operator + deletion_protection = false + + database_instance_class = var.database_instance_class + database_engine_version = var.database_engine_version + database_snapshot_identifier = var.database_snapshot_identifier + database_sort_buffer_size = var.database_sort_buffer_size + + database_performance_insights_kms_key_arn = null + + allowed_inbound_cidr = var.allowed_inbound_cidr + allowed_inbound_ipv6_cidr = ["::/0"] + + eks_cluster_version = "1.25" + kubernetes_public_access = true + kubernetes_public_access_cidrs = ["0.0.0.0/0"] + + domain_name = var.domain_name + zone_id = var.zone_id + subdomain = var.subdomain + + # Add License Post-Operator + license = var.wandb_license + + # Use standard sizing Post-Operator + size = var.size + + # Set the External DNS Custom Domain Filter Post-Operator + custom_domain_filter = var.custom_domain_filter + + bucket_name = var.bucket_name + bucket_kms_key_arn = var.bucket_kms_key_arn + use_internal_queue = true + + aws_loadbalancer_controller_tags = var.aws_loadbalancer_controller_tags +} + +data "aws_eks_cluster" "app_cluster" { + name = module.wandb_infra.cluster_id +} + +data "aws_eks_cluster_auth" "app_cluster" { + name = module.wandb_infra.cluster_id +} + +provider "kubernetes" { + host = data.aws_eks_cluster.app_cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.app_cluster.token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name] + command = "aws" + } +} + +# Enable the Helm provider +provider "helm" { + kubernetes { + host = data.aws_eks_cluster.app_cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.app_cluster.token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name] + command = "aws" + } + } +} + +output "bucket_name" { + value = module.wandb_infra.bucket_name +} + +output "bucket_queue_name" { + value = module.wandb_infra.bucket_queue_name +} diff --git a/docs/operator-migration/post-operator.tfvars b/docs/operator-migration/post-operator.tfvars new file mode 100644 index 00000000..9c3ae571 --- /dev/null +++ b/docs/operator-migration/post-operator.tfvars @@ -0,0 +1,12 @@ +namespace = "operator-upgrade" +domain_name = "sandbox-aws.wandb.ml" +zone_id = "Z032246913CW32RVRY0WU" +subdomain = "operator-upgrade" +wandb_license = "eyJh" +# wandb_version = "0.51.2" Is now coming from the Release Channel or set in the User Spec. + +# Needed Operator Variables for Upgrade +size = "small" +enable_dummy_dns = true +enable_operator_alb = true +custom_domain_filter = "sandbox-aws.wandb.ml" \ No newline at end of file diff --git a/docs/operator-migration/pre-operator.tf b/docs/operator-migration/pre-operator.tf new file mode 100644 index 00000000..40bf5a2b --- /dev/null +++ b/docs/operator-migration/pre-operator.tf @@ -0,0 +1,112 @@ +provider "aws" { + region = "us-west-2" + + default_tags { + tags = { + GithubRepo = "terraform-aws-wandb" + GithubOrg = "wandb" + Enviroment = "Example" + Example = "PublicDnsExternal" + } + } +} + +terraform { + required_version = "~> 1.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 3.6" # Pre-Operator + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.23" + } + } +} + +module "wandb_infra" { + source = "wandb/wandb/aws" + version = "1.16.10" + + namespace = var.namespace + public_access = true + external_dns = true + + deletion_protection = false + + database_instance_class = var.database_instance_class + database_engine_version = var.database_engine_version + database_snapshot_identifier = var.database_snapshot_identifier + database_sort_buffer_size = var.database_sort_buffer_size + + database_performance_insights_kms_key_arn = null + + allowed_inbound_cidr = var.allowed_inbound_cidr + allowed_inbound_ipv6_cidr = ["::/0"] + + eks_cluster_version = "1.25" + kubernetes_public_access = true + kubernetes_public_access_cidrs = ["0.0.0.0/0"] + + domain_name = var.domain_name + zone_id = var.zone_id + subdomain = var.subdomain + + bucket_name = var.bucket_name + bucket_kms_key_arn = var.bucket_kms_key_arn + use_internal_queue = true +} + +data "aws_eks_cluster" "app_cluster" { + name = module.wandb_infra.cluster_id +} + +data "aws_eks_cluster_auth" "app_cluster" { + name = module.wandb_infra.cluster_id +} + +provider "kubernetes" { + host = data.aws_eks_cluster.app_cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.app_cluster.token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name] + command = "aws" + } +} + +module "wandb_app" { + source = "wandb/wandb/kubernetes" + version = "1.12.0" + + license = var.wandb_license + + host = module.wandb_infra.url + bucket = "s3://${module.wandb_infra.bucket_name}" + bucket_aws_region = module.wandb_infra.bucket_region + bucket_queue = "internal://" + bucket_kms_key_arn = module.wandb_infra.kms_key_arn + database_connection_string = "mysql://${module.wandb_infra.database_connection_string}" + redis_connection_string = "redis://${module.wandb_infra.elasticache_connection_string}?tls=true&ttlInSeconds=604800" + + wandb_image = var.wandb_image + wandb_version = var.wandb_version + + service_port = module.wandb_infra.internal_app_port + + depends_on = [module.wandb_infra] + + other_wandb_env = merge({ + "GORILLA_CUSTOMER_SECRET_STORE_SOURCE" = "aws-secretmanager://${var.namespace}?namespace=${var.namespace}" + }, var.other_wandb_env) +} + +output "bucket_name" { + value = module.wandb_infra.bucket_name +} + +output "bucket_queue_name" { + value = module.wandb_infra.bucket_queue_name +} diff --git a/docs/operator-migration/pre-operator.tfvars b/docs/operator-migration/pre-operator.tfvars new file mode 100644 index 00000000..7cfb15a1 --- /dev/null +++ b/docs/operator-migration/pre-operator.tfvars @@ -0,0 +1,7 @@ +namespace = "operator-upgrade" +domain_name = "sandbox-aws.wandb.ml" +zone_id = "Z032246913CW32RVRY0WU" +subdomain = "operator-upgrade" +wandb_license = "eyJh" +wandb_version = "0.51.2" +# size = "small" \ No newline at end of file diff --git a/docs/operator-migration/readme.md b/docs/operator-migration/readme.md new file mode 100644 index 00000000..10179d04 --- /dev/null +++ b/docs/operator-migration/readme.md @@ -0,0 +1,267 @@ +# Operator Migration + +This guide details the steps required to upgrade from **_pre-operator_** to **_post-operator_** environments using the [terraform-aws-wandb](https://registry.terraform.io/modules/wandb/wandb/aws/latest) module. + +## Introduction to Operator Shift + +The transition to a Kubernetes [operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) pattern is crucial for our architecture. This section explains the transition from **_pre_** to **_post_** architectures. + +### Reasons for the Architecture Shift + +Historically, the W&B application was deployed as a single Deployment and pod within a Kubernetes Cluster or Docker container. We have always recommended externalizing the Metadata Store and Object Store to decouple state from the application, especially in production environments. + +As the application grew, the need to evolve from a monolithic container to a distributed system became apparent. This change facilitates backend logic handling and seamlessly introduces **_in-kubernetes_** infrastructure capabilities. It also supports deploying new services essential for additional features that W&B relies on. + +Previously, any Kubernetes-related changes required updating the [terraform-kubernetes-wandb](https://github.com/wandb/terraform-kubernetes-wandb), ensuring compatibility across cloud providers, configuring necessary Terraform variables, and executing a terraform apply for each backend or Kubernetes-level change. This process was not scalable and placed a significant burden on our support staff to assist customers with upgrades. + +The solution was to implement an **_Operator_** that connects to a central [deploy.wandb.ai](https://deploy.wandb.ai) server with its `license` to request the latest specification changes for a given **_Release Channel_** and apply them. Helm was chosen as both the deployment mechanism for our operator and the means for the operator to handle all configuration templating of the W&B Kubernetes stack; Helmception. + +You can install the operator from [charts/operator](https://github.com/wandb/helm-charts/tree/main/charts/operator). This installation creates a deployment called `controller-manager` and utilizes a **_Custom Resource_** definition named `weightsandbiases.apps.wandb.com` (shortName: `wandb`), which takes a single `spec` and applies it to the cluster: + +```yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: weightsandbiases.apps.wandb.com +``` + +The `controller-manager` installs [charts/operator-wandb](https://github.com/wandb/helm-charts/tree/main/charts/operator-wandb) based on the spec of the **_Custom Resource_**, **_Release Channel_**, and a **_User Defined Config_** in the new **_System Console_**. This hierarchy allows for maximum configuration flexibility at the user end and enables W&B to release new images, configurations, features, and Helm updates without requiring Terraform reruns. + +## Before and After Architecture + +Previously, our architecture used: + +```hcl +module "wandb_infra" { + source = "wandb/wandb/aws" + version = "1.16.10" + ... +} +``` + +to control the infrastructure: + +![pre-operator-infra](./images/pre-operator-infra.svg) + +and this module to deploy the W&B application: + +```hcl +module "wandb_app" { + source = "wandb/wandb/kubernetes" + version = "1.12.0" +} +``` + +![pre-operator-k8s](./images/pre-operator-k8s.svg) + +Post-transition, the architecture uses: + +```hcl +module "wandb_infra" { + source = "wandb/wandb/aws" + version = "4.7.2" + ... +} +``` + +to manage both the installation of infrastructure and the W&B application to the Kubernetes cluster, thus eliminating the need for the `module "wandb_app"` in `post-operator.tf`. + +![post-operator-k8s](./images/post-operator-k8s.svg) + +This architectural shift facilitates the introduction of additional customer features (like OpenTelemetry, Prometheus, HPA's, Kafka, and image updates) without requiring manual Terraform operations by SRE/Infrastructure teams. + +### Specification Hierarchy + +In our operator model, configuration specifications follow a hierarchical model where higher-level specifications override lower-level ones. Here’s how it works: + +- **Release Channel Spec**: This base level configuration sets default values and configurations based on the **_Release Channel_** set by W&B for the deployment. +- **User Input Spec**: Users can override the default settings provided by the Release Channel Spec through the System Console. +- **Custom Resource Spec**: The highest level of specification, which comes from the Terraform configuration. Any values specified here will override both the User Input and Release Channel specifications. + +This hierarchical model ensures that configurations are flexible and customizable to meet varying needs while maintaining a manageable and systematic approach to upgrades and changes. + +## Migration + +To commence with a base installation of the W&B Pre-Operator, ensure that `post-operator.tf` has a `.disabled` file extension and `pre-operator.tf` is active (i.e., does not have a `.disabled` extension). + +### Prerequisites + +Before initiating the migration process, ensure the following prerequisites are met: + +- **Egress**: The deployment can't be airgapped. It needs access to [deploy.wandb.ai](deploy.wandb.ai) to get the latest spec for the **_Release Channel_**. +- **AWS Credentials**: Proper AWS credentials configured to interact with your AWS resources. +- **Terraform Installed**: The latest version of Terraform should be installed on your system. +- **Route53 Hosted Zone**: An existing Route53 hosted zone corresponding to the domain under which the application will be served. +- **Pre-Operator Terraform Files**: Ensure `pre-operator.tf` and associated variable files like `pre-operator.tfvars` are correctly set up. + +### Pre-Operator Setup + +Execute the following Terraform commands to initialize and apply the configuration for the Pre-Operator setup: + +```bash +terraform init -upgrade +terraform apply -var-file=./pre-operator.tfvars +``` + +`pre-operator.tf` should look something like this: + +```ini +namespace = "operator-upgrade" +domain_name = "sandbox-aws.wandb.ml" +zone_id = "Z032246913CW32RVRY0WU" +subdomain = "operator-upgrade" +wandb_license = "ey..." +wandb_version = "0.51.2" +``` + +The `pre-operator.tf` configuration calls two modules: + +```hcl +module "wandb_infra" { + source = "wandb/wandb/aws" + version = "1.16.10" + ... +} +``` + +This module spins up the infrastructure. + +```hcl +module "wandb_app" { + source = "wandb/wandb/kubernetes" + version = "1.12.0" +} +``` + +This module deploys the application. + +### Post-Operator Setup + +Make sure that `pre-operator.tf` has a `.disabled` extension, and `post-operator.tf` is active. + +The `post-operator.tfvars` includes additional variables: + +```ini +... +# wandb_version = "0.51.2" is now managed via the Release Channel or set in the User Spec. + +# Required Operator Variables for Upgrade: +size = "small" +enable_dummy_dns = true +enable_operator_alb = true +custom_domain_filter = "sandbox-aws.wandb.ml" +``` + +Run the following commands to initialize and apply the Post-Operator configuration: + +```bash +terraform init -upgrade +terraform apply -var-file=./post-operator.tfvars +``` + +The plan and apply steps will update the following resources: + +```yaml +actions: + create: + - aws_efs_backup_policy.storage_class + - aws_efs_file_system.storage_class + - aws_efs_mount_target.storage_class["0"] + - aws_efs_mount_target.storage_class["1"] + - aws_eks_addon.efs + - aws_iam_openid_connect_provider.eks + - aws_iam_policy.secrets_manager + - aws_iam_role_policy_attachment.ebs_csi + - aws_iam_role_policy_attachment.eks_efs + - aws_iam_role_policy_attachment.node_secrets_manager + - aws_security_group.storage_class_nfs + - aws_security_group_rule.nfs_ingress + - random_pet.efs + - aws_s3_bucket_acl.file_storage + - aws_s3_bucket_cors_configuration.file_storage + - aws_s3_bucket_ownership_controls.file_storage + - aws_s3_bucket_server_side_encryption_configuration.file_storage + - helm_release.operator + - helm_release.wandb + - aws_cloudwatch_log_group.this[0] + - aws_iam_policy.default + - aws_iam_role.default + - aws_iam_role_policy_attachment.default + - helm_release.external_dns + - aws_default_network_acl.this[0] + - aws_default_route_table.default[0] + - aws_iam_policy.default + - aws_iam_role.default + - aws_iam_role_policy_attachment.default + - helm_release.aws_load_balancer_controller + + update_in_place: + - aws_iam_policy.node_IMDSv2 + - aws_iam_policy.node_cloudwatch + - aws_iam_policy.node_kms + - aws_iam_policy.node_s3 + - aws_iam_policy.node_sqs + - aws_eks_cluster.this[0] + - aws_elasticache_replication_group.default + - aws_rds_cluster.this[0] + - aws_rds_cluster_instance.this["1"] + - aws_default_security_group.this[0] + - aws_subnet.private[0] + - aws_subnet.private[1] + - aws_subnet.public[0] + - aws_subnet.public[1] + - aws_launch_template.workers["primary"] + + destroy: + - kubernetes_config_map.config_map + - kubernetes_deployment.wandb + - kubernetes_priority_class.priority + - kubernetes_secret.secret + - kubernetes_service.prometheus + - kubernetes_service.service + - random_id.snapshot_identifier[0] + + replace: + - aws_autoscaling_attachment.autoscaling_attachment["primary"] + - aws_route53_record.alb + - aws_eks_node_group.workers["primary"] +``` + +You should see something like this: + +![post-operator-apply](./images/post-operator-apply.png) + +Note that in `post-operator.tf`, there is a single: + +```hcl +module "wandb_infra" { + source = "wandb/wandb/aws" + version = "4.7.2" + ... +} +``` + +#### Changes in the Post-Operator Configuration: + +1. **Update Required Providers**: Change `required_providers.aws.version` from `3.6` to `4.0` for provider compatibility. +2. **DNS and Load Balancer Configuration**: Integrate `enable_dummy_dns` and `enable_operator_alb` to manage DNS records and AWS Load Balancer setup through an Ingress. +3. **License and Size Configuration**: Transfer the `license` and `size` parameters directly to the `wandb_infra` module to match new operational requirements. +4. **Custom Domain Handling**: If necessary, use `custom_domain_filter` to troubleshoot DNS issues by checking the External DNS pod logs within the `kube-system` namespace. +5. **Helm Provider Configuration**: Enable and configure the Helm provider to manage Kubernetes resources effectively: + +```hcl +provider "helm" { + kubernetes { + host = data.aws_eks_cluster.app_cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.app_cluster.token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name] + command = "aws" + } + } +} +``` + +This comprehensive setup ensures a smooth transition from the Pre-Operator to the Post-Operator configuration, leveraging new efficiencies and capabilities enabled by the operator model. diff --git a/docs/operator-migration/variables.tf b/docs/operator-migration/variables.tf new file mode 100644 index 00000000..f7c3bd7e --- /dev/null +++ b/docs/operator-migration/variables.tf @@ -0,0 +1,145 @@ +variable "namespace" { + type = string + description = "Name prefix used for resources" +} + +variable "domain_name" { + type = string + description = "Domain name used to access instance." +} + +variable "zone_id" { + type = string + description = "Id of Route53 zone" +} + +variable "size" { + default = "small" + description = "Deployment size" + nullable = true + type = string +} + +variable "subdomain" { + type = string + default = null + description = "Subdomain for accessing the Weights & Biases UI." +} + +variable "wandb_license" { + type = string +} + +variable "database_engine_version" { + description = "Version for MySQL Auora" + type = string + default = "8.0.mysql_aurora.3.03.0" +} + +variable "database_instance_class" { + description = "Instance type to use by database master instance." + type = string + default = "db.r5.large" +} + +variable "database_snapshot_identifier" { + description = "Specifies whether or not to create this cluster from a snapshot. You can use either the name or ARN when specifying a DB cluster snapshot, or the ARN when specifying a DB snapshot" + type = string + default = null +} + +variable "database_sort_buffer_size" { + description = "Specifies the sort_buffer_size value to set for the database" + type = number + default = 262144 +} + +variable "wandb_version" { + description = "The version of Weights & Biases local to deploy." + type = string + default = "latest" +} + +variable "wandb_image" { + description = "Docker repository of to pull the wandb image from." + type = string + default = "wandb/local" +} + +variable "bucket_name" { + type = string + default = "" +} + +variable "bucket_kms_key_arn" { + type = string + description = "The Amazon Resource Name of the KMS key with which S3 storage bucket objects will be encrypted." + default = "" +} + +variable "enable_dummy_dns" { + type = bool + default = false + description = "Boolean indicating whether or not to enable dummy DNS for the old alb" +} + +variable "enable_operator_alb" { + type = bool + default = false + description = "Boolean indicating whether to use operatore ALB (true) or not (false)." +} + +variable "custom_domain_filter" { + description = "A custom domain filter to be used by external-dns instead of the default FQDN. If not set, the local FQDN is used." + type = string + default = null +} + +variable "allowed_inbound_cidr" { + default = ["0.0.0.0/0"] + nullable = false + type = list(string) +} + + +variable "allowed_inbound_ipv6_cidr" { + default = ["::/0"] + nullable = false + type = list(string) +} + +variable "other_wandb_env" { + type = map(string) + description = "Extra environment variables for W&B" + default = {} +} + +variable "system_reserved_cpu_millicores" { + description = "(Optional) The amount of 'system-reserved' CPU millicores to pass to the kubelet. For example: 100. A value of -1 disables the flag." + type = number + default = -1 +} + +variable "system_reserved_memory_megabytes" { + description = "(Optional) The amount of 'system-reserved' memory in megabytes to pass to the kubelet. For example: 100. A value of -1 disables the flag." + type = number + default = -1 +} + +variable "system_reserved_ephemeral_megabytes" { + description = "(Optional) The amount of 'system-reserved' ephemeral storage in megabytes to pass to the kubelet. For example: 1000. A value of -1 disables the flag." + type = number + default = -1 +} + +variable "system_reserved_pid" { + description = "(Optional) The amount of 'system-reserved' process ids [pid] to pass to the kubelet. For example: 1000. A value of -1 disables the flag." + type = number + default = -1 +} + +variable "aws_loadbalancer_controller_tags" { + description = "(Optional) A map of AWS tags to apply to all resources managed by the load balancer controller" + type = map(string) + default = {} +} From 12e053d520f6998689d3bec0352b320a9105ba9e Mon Sep 17 00:00:00 2001 From: Aman Pruthi Date: Tue, 18 Jun 2024 00:16:31 +0530 Subject: [PATCH 22/72] feat: Added support yace (#218) * added yace and otel configs * fix * fixed --------- Co-authored-by: amanpruthi --- main.tf | 56 +++++++++++++++++++++++++++++++++++ modules/app_eks/outputs.tf | 4 +++ modules/iam_role/main.tf | 51 +++++++++++++++++++++++++++++++ modules/iam_role/outputs.tf | 3 ++ modules/iam_role/variables.tf | 8 +++++ variables.tf | 6 ++++ 6 files changed, 128 insertions(+) create mode 100644 modules/iam_role/main.tf create mode 100644 modules/iam_role/outputs.tf create mode 100644 modules/iam_role/variables.tf diff --git a/main.tf b/main.tf index 3b6fc78d..e551263e 100644 --- a/main.tf +++ b/main.tf @@ -222,6 +222,15 @@ locals { lb_name_truncated = "${substr(var.namespace, 0, local.max_lb_name_length)}-alb-k8s" } +data "aws_region" "current" {} + +module "iam_role" { + count = var.enable_yace ? 1 : 0 + source = "./modules/iam_role" + namespace = var.namespace + aws_iam_openid_connect_provider_url = module.app_eks.aws_iam_openid_connect_provider +} + module "wandb" { source = "wandb/wandb/helm" version = "1.2.0" @@ -300,6 +309,53 @@ module "wandb" { }, var.app_wandb_env) } + # To support otel rds and redis metrics need operator-wandb chart minimum version 0.13.8 ( yace subchart) + yace = var.enable_yace ? { + install = true + regions = [data.aws_region.current.name] + serviceAccount = { annotations = { "eks.amazonaws.com/role-arn" = module.iam_role[0].role_arn} } + } : { + install = false + regions = [] + serviceAccount = {} + } + + otel = { + daemonset = var.enable_yace ? { + config = { + receivers = { + prometheus = { + config = { + scrape_configs = [ + { job_name = "yace" + scheme = "http" + metrics_path = "/metrics" + dns_sd_configs = [ + { names = ["yace"] + type = "A" + port = 5000 + } + ] + } + ] + } + } + } + service = { + pipelines = { + metrics = { + receivers = ["hostmetrics", "k8s_cluster", "kubeletstats", "prometheus"] + } + } + } + } + } : { config = { + receivers = {} + service = {} + } + } + } + mysql = { install = false } redis = { install = false } diff --git a/modules/app_eks/outputs.tf b/modules/app_eks/outputs.tf index 304b51db..cc791455 100644 --- a/modules/app_eks/outputs.tf +++ b/modules/app_eks/outputs.tf @@ -17,3 +17,7 @@ output "node_role" { output "primary_workers_security_group_id" { value = aws_security_group.primary_workers.id } + +output "aws_iam_openid_connect_provider" { + value = aws_iam_openid_connect_provider.eks.url +} \ No newline at end of file diff --git a/modules/iam_role/main.tf b/modules/iam_role/main.tf new file mode 100644 index 00000000..5d982e56 --- /dev/null +++ b/modules/iam_role/main.tf @@ -0,0 +1,51 @@ +data "aws_caller_identity" "current" {} + +resource "aws_iam_role" "irsa" { + name = "${var.namespace}-yace-irsa-role" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Sid = "" + Effect = "Allow" + Principal = { + Federated = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${var.aws_iam_openid_connect_provider_url}" + } + Action = ["sts:AssumeRoleWithWebIdentity"] + Condition = { + StringLike = { + "${var.aws_iam_openid_connect_provider_url}:sub" = "system:serviceaccount:*:yace" + "${var.aws_iam_openid_connect_provider_url}:aud" = "sts.amazonaws.com" + } + } + } + ] + }) +} + + +resource "aws_iam_policy" "irsa" { + name = "${var.namespace}-yace-irsa-policy" + description = "IRSA IAM Policy" + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "tag:GetResources", + "cloudwatch:GetMetricData", + "cloudwatch:GetMetricStatistics", + "cloudwatch:ListMetrics" + ] + Resource = "*" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "default" { + role = aws_iam_role.irsa.name + policy_arn = aws_iam_policy.irsa.arn +} \ No newline at end of file diff --git a/modules/iam_role/outputs.tf b/modules/iam_role/outputs.tf new file mode 100644 index 00000000..989f8c0c --- /dev/null +++ b/modules/iam_role/outputs.tf @@ -0,0 +1,3 @@ +output "role_arn" { + value = aws_iam_role.irsa.arn +} \ No newline at end of file diff --git a/modules/iam_role/variables.tf b/modules/iam_role/variables.tf new file mode 100644 index 00000000..4cd10397 --- /dev/null +++ b/modules/iam_role/variables.tf @@ -0,0 +1,8 @@ +variable "namespace" { + type = string + description = "The name prefix for all resources created." +} + +variable "aws_iam_openid_connect_provider_url" { + type = string +} \ No newline at end of file diff --git a/variables.tf b/variables.tf index 067fd9f5..bf75219d 100644 --- a/variables.tf +++ b/variables.tf @@ -441,3 +441,9 @@ variable "parquet_wandb_env" { description = "Extra environment variables for W&B" default = {} } + +variable "enable_yace" { + type = bool + description = "deploy yet another cloudwatch exporter to fetch aws resources metrics" + default = true +} \ No newline at end of file From 6e2932ed1871e77a8469a1f96739e40f18bbec7a Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 17 Jun 2024 18:47:01 +0000 Subject: [PATCH 23/72] chore(release): version 4.12.0 [skip ci] ## [4.12.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.11.0...v4.12.0) (2024-06-17) ### Features * Added support yace ([#218](https://github.com/wandb/terraform-aws-wandb/issues/218)) ([12e053d](https://github.com/wandb/terraform-aws-wandb/commit/12e053d520f6998689d3bec0352b320a9105ba9e)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index df212ea5..f68813fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.12.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.11.0...v4.12.0) (2024-06-17) + + +### Features + +* Added support yace ([#218](https://github.com/wandb/terraform-aws-wandb/issues/218)) ([12e053d](https://github.com/wandb/terraform-aws-wandb/commit/12e053d520f6998689d3bec0352b320a9105ba9e)) + ## [4.11.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.10.2...v4.11.0) (2024-05-18) From be152e59e7ed8406fad8cb6ab0a9e6ec31497bd1 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Mon, 17 Jun 2024 12:57:56 -0700 Subject: [PATCH 24/72] chore: Replace deprecated resolve conflicts field (#230) --- modules/app_eks/add-ons.tf | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/modules/app_eks/add-ons.tf b/modules/app_eks/add-ons.tf index 52d50e4a..b4a7456e 100644 --- a/modules/app_eks/add-ons.tf +++ b/modules/app_eks/add-ons.tf @@ -32,14 +32,15 @@ resource "aws_iam_role" "oidc" { ### add-ons resource "aws_eks_addon" "aws_efs_csi_driver" { - depends_on = [ - aws_eks_addon.vpc_cni - ] - cluster_name = var.namespace - addon_name = "aws-efs-csi-driver" - addon_version = "v1.7.7-eksbuild.1" - resolve_conflicts = "OVERWRITE" - } + depends_on = [ + aws_eks_addon.vpc_cni + ] + cluster_name = var.namespace + addon_name = "aws-efs-csi-driver" + addon_version = "v1.7.7-eksbuild.1" + resolve_conflicts_on_create = "OVERWRITE" + resolve_conflicts_on_update = "OVERWRITE" +} resource "aws_eks_addon" "aws_ebs_csi_driver" { depends_on = [ @@ -48,7 +49,8 @@ resource "aws_eks_addon" "aws_ebs_csi_driver" { cluster_name = var.namespace addon_name = "aws-ebs-csi-driver" addon_version = "v1.25.0-eksbuild.1" - resolve_conflicts = "OVERWRITE" + resolve_conflicts_on_create = "OVERWRITE" + resolve_conflicts_on_update = "OVERWRITE" } resource "aws_eks_addon" "coredns" { @@ -58,7 +60,8 @@ resource "aws_eks_addon" "coredns" { cluster_name = var.namespace addon_name = "coredns" addon_version = "v1.9.3-eksbuild.11" - resolve_conflicts = "OVERWRITE" + resolve_conflicts_on_create = "OVERWRITE" + resolve_conflicts_on_update = "OVERWRITE" } resource "aws_eks_addon" "kube_proxy" { @@ -68,13 +71,15 @@ resource "aws_eks_addon" "kube_proxy" { cluster_name = var.namespace addon_name = "kube-proxy" addon_version = "v1.25.14-eksbuild.2" - resolve_conflicts = "OVERWRITE" + resolve_conflicts_on_create = "OVERWRITE" + resolve_conflicts_on_update = "OVERWRITE" } resource "aws_eks_addon" "vpc_cni" { cluster_name = var.namespace addon_name = "vpc-cni" addon_version = "v1.18.0-eksbuild.1" - resolve_conflicts = "OVERWRITE" - service_account_role_arn = aws_iam_role.oidc.arn -} \ No newline at end of file + resolve_conflicts_on_create = "OVERWRITE" + resolve_conflicts_on_update = "OVERWRITE" + service_account_role_arn = aws_iam_role.oidc.arn +} From 974b4f3ec0d01b34cf6d83008c9fe2a0d3d8ee7a Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Mon, 17 Jun 2024 13:12:21 -0700 Subject: [PATCH 25/72] fix: Remove white space (#231) --- modules/app_eks/add-ons.tf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/app_eks/add-ons.tf b/modules/app_eks/add-ons.tf index b4a7456e..69b2ff49 100644 --- a/modules/app_eks/add-ons.tf +++ b/modules/app_eks/add-ons.tf @@ -1,4 +1,3 @@ - ### IAM policy and role for vpc-cni data "aws_iam_policy_document" "oidc_assume_role" { statement { From 9dbd6125ffd51ed6219e1a02b1fddc3732971030 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 17 Jun 2024 20:12:52 +0000 Subject: [PATCH 26/72] chore(release): version 4.12.1 [skip ci] ### [4.12.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.12.0...v4.12.1) (2024-06-17) ### Bug Fixes * Remove white space ([#231](https://github.com/wandb/terraform-aws-wandb/issues/231)) ([974b4f3](https://github.com/wandb/terraform-aws-wandb/commit/974b4f3ec0d01b34cf6d83008c9fe2a0d3d8ee7a)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f68813fb..0d50b62e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.12.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.12.0...v4.12.1) (2024-06-17) + + +### Bug Fixes + +* Remove white space ([#231](https://github.com/wandb/terraform-aws-wandb/issues/231)) ([974b4f3](https://github.com/wandb/terraform-aws-wandb/commit/974b4f3ec0d01b34cf6d83008c9fe2a0d3d8ee7a)) + ## [4.12.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.11.0...v4.12.0) (2024-06-17) From 778f147aa9962fde6a74b7d35501ec7dd7abf2a9 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Mon, 17 Jun 2024 15:01:37 -0700 Subject: [PATCH 27/72] fix: Revert resolve conflicts var (#233) --- modules/app_eks/add-ons.tf | 47 +++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/modules/app_eks/add-ons.tf b/modules/app_eks/add-ons.tf index 69b2ff49..41c9cc8f 100644 --- a/modules/app_eks/add-ons.tf +++ b/modules/app_eks/add-ons.tf @@ -34,51 +34,46 @@ resource "aws_eks_addon" "aws_efs_csi_driver" { depends_on = [ aws_eks_addon.vpc_cni ] - cluster_name = var.namespace - addon_name = "aws-efs-csi-driver" - addon_version = "v1.7.7-eksbuild.1" - resolve_conflicts_on_create = "OVERWRITE" - resolve_conflicts_on_update = "OVERWRITE" + cluster_name = var.namespace + addon_name = "aws-efs-csi-driver" + addon_version = "v1.7.7-eksbuild.1" + resolve_conflicts = "OVERWRITE" } resource "aws_eks_addon" "aws_ebs_csi_driver" { depends_on = [ aws_eks_addon.vpc_cni ] - cluster_name = var.namespace - addon_name = "aws-ebs-csi-driver" - addon_version = "v1.25.0-eksbuild.1" - resolve_conflicts_on_create = "OVERWRITE" - resolve_conflicts_on_update = "OVERWRITE" + cluster_name = var.namespace + addon_name = "aws-ebs-csi-driver" + addon_version = "v1.25.0-eksbuild.1" + resolve_conflicts = "OVERWRITE" } resource "aws_eks_addon" "coredns" { depends_on = [ aws_eks_addon.vpc_cni ] - cluster_name = var.namespace - addon_name = "coredns" - addon_version = "v1.9.3-eksbuild.11" - resolve_conflicts_on_create = "OVERWRITE" - resolve_conflicts_on_update = "OVERWRITE" + cluster_name = var.namespace + addon_name = "coredns" + addon_version = "v1.9.3-eksbuild.11" + resolve_conflicts = "OVERWRITE" } resource "aws_eks_addon" "kube_proxy" { depends_on = [ aws_eks_addon.vpc_cni ] - cluster_name = var.namespace - addon_name = "kube-proxy" - addon_version = "v1.25.14-eksbuild.2" - resolve_conflicts_on_create = "OVERWRITE" - resolve_conflicts_on_update = "OVERWRITE" + cluster_name = var.namespace + addon_name = "kube-proxy" + addon_version = "v1.25.14-eksbuild.2" + resolve_conflicts = "OVERWRITE" } resource "aws_eks_addon" "vpc_cni" { - cluster_name = var.namespace - addon_name = "vpc-cni" - addon_version = "v1.18.0-eksbuild.1" - resolve_conflicts_on_create = "OVERWRITE" - resolve_conflicts_on_update = "OVERWRITE" - service_account_role_arn = aws_iam_role.oidc.arn + cluster_name = var.namespace + addon_name = "vpc-cni" + addon_version = "v1.18.0-eksbuild.1" + resolve_conflicts = "OVERWRITE" + service_account_role_arn = aws_iam_role.oidc.arn } From 5f83533f17994f32fbe8a30172f23ba37ec6d5b4 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 17 Jun 2024 22:02:07 +0000 Subject: [PATCH 28/72] chore(release): version 4.12.2 [skip ci] ### [4.12.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.12.1...v4.12.2) (2024-06-17) ### Bug Fixes * Revert resolve conflicts var ([#233](https://github.com/wandb/terraform-aws-wandb/issues/233)) ([778f147](https://github.com/wandb/terraform-aws-wandb/commit/778f147aa9962fde6a74b7d35501ec7dd7abf2a9)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d50b62e..9734eb7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.12.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.12.1...v4.12.2) (2024-06-17) + + +### Bug Fixes + +* Revert resolve conflicts var ([#233](https://github.com/wandb/terraform-aws-wandb/issues/233)) ([778f147](https://github.com/wandb/terraform-aws-wandb/commit/778f147aa9962fde6a74b7d35501ec7dd7abf2a9)) + ### [4.12.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.12.0...v4.12.1) (2024-06-17) From bb7b99e95595324c79e1bfaafd2f76d1241b1a8a Mon Sep 17 00:00:00 2001 From: Aman Pruthi Date: Fri, 21 Jun 2024 14:00:38 +0530 Subject: [PATCH 29/72] feat: Upgraded eks version 1.26 to 1.27 (#224) Upgraded eks cluster version from 1.26 to 1.27 --- modules/app_eks/add-ons.tf | 40 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/modules/app_eks/add-ons.tf b/modules/app_eks/add-ons.tf index 41c9cc8f..e3a5ae81 100644 --- a/modules/app_eks/add-ons.tf +++ b/modules/app_eks/add-ons.tf @@ -31,49 +31,49 @@ resource "aws_iam_role" "oidc" { ### add-ons resource "aws_eks_addon" "aws_efs_csi_driver" { - depends_on = [ - aws_eks_addon.vpc_cni - ] - cluster_name = var.namespace - addon_name = "aws-efs-csi-driver" - addon_version = "v1.7.7-eksbuild.1" - resolve_conflicts = "OVERWRITE" + depends_on = [ + aws_eks_addon.vpc_cni + ] + cluster_name = var.namespace + addon_name = "aws-efs-csi-driver" + addon_version = "v2.0.4-eksbuild.1" + resolve_conflicts = "OVERWRITE" } resource "aws_eks_addon" "aws_ebs_csi_driver" { depends_on = [ aws_eks_addon.vpc_cni ] - cluster_name = var.namespace - addon_name = "aws-ebs-csi-driver" - addon_version = "v1.25.0-eksbuild.1" - resolve_conflicts = "OVERWRITE" + cluster_name = var.namespace + addon_name = "aws-ebs-csi-driver" + addon_version = "v1.31.0-eksbuild.1" + resolve_conflicts = "OVERWRITE" } resource "aws_eks_addon" "coredns" { depends_on = [ aws_eks_addon.vpc_cni ] - cluster_name = var.namespace - addon_name = "coredns" - addon_version = "v1.9.3-eksbuild.11" - resolve_conflicts = "OVERWRITE" + cluster_name = var.namespace + addon_name = "coredns" + addon_version = "v1.10.1-eksbuild.11" + resolve_conflicts = "OVERWRITE" } resource "aws_eks_addon" "kube_proxy" { depends_on = [ aws_eks_addon.vpc_cni ] - cluster_name = var.namespace - addon_name = "kube-proxy" - addon_version = "v1.25.14-eksbuild.2" - resolve_conflicts = "OVERWRITE" + cluster_name = var.namespace + addon_name = "kube-proxy" + addon_version = "v1.27.12-eksbuild.5" + resolve_conflicts = "OVERWRITE" } resource "aws_eks_addon" "vpc_cni" { cluster_name = var.namespace addon_name = "vpc-cni" - addon_version = "v1.18.0-eksbuild.1" + addon_version = "v1.18.2-eksbuild.1" resolve_conflicts = "OVERWRITE" service_account_role_arn = aws_iam_role.oidc.arn } From 925199817ff1dda4e4212f70753ed70d469ab4fa Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 21 Jun 2024 08:31:24 +0000 Subject: [PATCH 30/72] chore(release): version 4.13.0 [skip ci] ## [4.13.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.12.2...v4.13.0) (2024-06-21) ### Features * Upgraded eks version 1.26 to 1.27 ([#224](https://github.com/wandb/terraform-aws-wandb/issues/224)) ([bb7b99e](https://github.com/wandb/terraform-aws-wandb/commit/bb7b99e95595324c79e1bfaafd2f76d1241b1a8a)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9734eb7e..682235e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.13.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.12.2...v4.13.0) (2024-06-21) + + +### Features + +* Upgraded eks version 1.26 to 1.27 ([#224](https://github.com/wandb/terraform-aws-wandb/issues/224)) ([bb7b99e](https://github.com/wandb/terraform-aws-wandb/commit/bb7b99e95595324c79e1bfaafd2f76d1241b1a8a)) + ### [4.12.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.12.1...v4.12.2) (2024-06-17) From 4ebda4985a0d31df757598c9b3447b6d310e40f8 Mon Sep 17 00:00:00 2001 From: Aman Pruthi Date: Fri, 21 Jun 2024 17:48:36 +0530 Subject: [PATCH 31/72] feat: Added support s3 endpoints (#202) Added support s3 gateway endpoints --- data.tf | 4 +++- main.tf | 9 +++++++++ modules/endpoint/main.tf | 21 +++++++++++++++++++++ modules/endpoint/outputs.tf | 0 modules/endpoint/variables.tf | 14 ++++++++++++++ 5 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 modules/endpoint/main.tf create mode 100644 modules/endpoint/outputs.tf create mode 100644 modules/endpoint/variables.tf diff --git a/data.tf b/data.tf index 9e8f4603..21e6e2d0 100644 --- a/data.tf +++ b/data.tf @@ -7,4 +7,6 @@ data "aws_sqs_queue" "file_storage" { count = local.use_internal_queue ? 0 : 1 depends_on = [module.file_storage] name = local.bucket_queue_name -} \ No newline at end of file +} + +data "aws_region" "current" { } \ No newline at end of file diff --git a/main.tf b/main.tf index e551263e..97122925 100644 --- a/main.tf +++ b/main.tf @@ -55,6 +55,15 @@ locals { network_database_subnet_group_name = var.create_vpc ? module.networking.database_subnet_group_name : "${var.namespace}-database-subnet" } +module "s3_endpoint" { + count = length(var.private_link_allowed_account_ids) > 0 ? 1 : 0 + source = "./modules/endpoint" + service_name = "com.amazonaws.${data.aws_region.current.name}.s3" + network_id = local.network_id + private_route_table_id = module.networking.private_route_table_ids + depends_on = [module.networking] +} + module "database" { source = "./modules/database" diff --git a/modules/endpoint/main.tf b/modules/endpoint/main.tf new file mode 100644 index 00000000..dc64c71c --- /dev/null +++ b/modules/endpoint/main.tf @@ -0,0 +1,21 @@ +resource "aws_vpc_endpoint" "default" { + vpc_id = var.network_id + service_name = var.service_name + vpc_endpoint_type = "Gateway" + auto_accept = true + route_table_ids = var.private_route_table_id + + policy = < Date: Fri, 21 Jun 2024 12:19:04 +0000 Subject: [PATCH 32/72] chore(release): version 4.14.0 [skip ci] ## [4.14.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.13.0...v4.14.0) (2024-06-21) ### Features * Added support s3 endpoints ([#202](https://github.com/wandb/terraform-aws-wandb/issues/202)) ([4ebda49](https://github.com/wandb/terraform-aws-wandb/commit/4ebda4985a0d31df757598c9b3447b6d310e40f8)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 682235e2..aacd3e40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.14.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.13.0...v4.14.0) (2024-06-21) + + +### Features + +* Added support s3 endpoints ([#202](https://github.com/wandb/terraform-aws-wandb/issues/202)) ([4ebda49](https://github.com/wandb/terraform-aws-wandb/commit/4ebda4985a0d31df757598c9b3447b6d310e40f8)) + ## [4.13.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.12.2...v4.13.0) (2024-06-21) From 4d24df5d85df731c78801e2d625cf16e9d8bc5d3 Mon Sep 17 00:00:00 2001 From: Aman Pruthi Date: Mon, 24 Jun 2024 14:51:10 +0530 Subject: [PATCH 33/72] feat: Upgraded eks version from 1.27 to 1.28 (#226) Added add-on version for eks 1.28 --- modules/app_eks/add-ons.tf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/app_eks/add-ons.tf b/modules/app_eks/add-ons.tf index e3a5ae81..56503d6c 100644 --- a/modules/app_eks/add-ons.tf +++ b/modules/app_eks/add-ons.tf @@ -29,7 +29,8 @@ resource "aws_iam_role" "oidc" { -### add-ons +### add-ons for eks version 1.28 + resource "aws_eks_addon" "aws_efs_csi_driver" { depends_on = [ aws_eks_addon.vpc_cni @@ -66,14 +67,14 @@ resource "aws_eks_addon" "kube_proxy" { ] cluster_name = var.namespace addon_name = "kube-proxy" - addon_version = "v1.27.12-eksbuild.5" + addon_version = "v1.28.8-eksbuild.5" resolve_conflicts = "OVERWRITE" } resource "aws_eks_addon" "vpc_cni" { cluster_name = var.namespace addon_name = "vpc-cni" - addon_version = "v1.18.2-eksbuild.1" + addon_version = "v1.18.0-eksbuild.1" resolve_conflicts = "OVERWRITE" service_account_role_arn = aws_iam_role.oidc.arn } From c813759a3dbb7be28c423756cd2bcd50108ea47e Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 24 Jun 2024 09:21:39 +0000 Subject: [PATCH 34/72] chore(release): version 4.15.0 [skip ci] ## [4.15.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.14.0...v4.15.0) (2024-06-24) ### Features * Upgraded eks version from 1.27 to 1.28 ([#226](https://github.com/wandb/terraform-aws-wandb/issues/226)) ([4d24df5](https://github.com/wandb/terraform-aws-wandb/commit/4d24df5d85df731c78801e2d625cf16e9d8bc5d3)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aacd3e40..698aa19e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.15.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.14.0...v4.15.0) (2024-06-24) + + +### Features + +* Upgraded eks version from 1.27 to 1.28 ([#226](https://github.com/wandb/terraform-aws-wandb/issues/226)) ([4d24df5](https://github.com/wandb/terraform-aws-wandb/commit/4d24df5d85df731c78801e2d625cf16e9d8bc5d3)) + ## [4.14.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.13.0...v4.14.0) (2024-06-21) From 1e758122e9cb0df34aa2e4ded1368bce5be75278 Mon Sep 17 00:00:00 2001 From: Aman Pruthi Date: Mon, 24 Jun 2024 15:37:54 +0530 Subject: [PATCH 35/72] feat: Added private-only traffic feature (#192) Added private only trrafic feature --- main.tf | 30 ++++++++++-------- modules/app_lb/main.tf | 51 +++++++++++++++++++++++++++++++ modules/app_lb/outputs.tf | 8 +++++ modules/app_lb/variables.tf | 11 +++++++ modules/private_link/main.tf | 6 +++- modules/private_link/variables.tf | 7 +++++ variables.tf | 13 ++++++++ 7 files changed, 112 insertions(+), 14 deletions(-) diff --git a/main.tf b/main.tf index 97122925..b1e1491b 100644 --- a/main.tf +++ b/main.tf @@ -174,27 +174,31 @@ module "app_lb" { fqdn = local.full_fqdn extra_fqdn = local.extra_fqdn - allowed_inbound_cidr = var.allowed_inbound_cidr - allowed_inbound_ipv6_cidr = var.allowed_inbound_ipv6_cidr - target_port = local.internal_app_port + allowed_inbound_cidr = var.allowed_inbound_cidr + allowed_inbound_ipv6_cidr = var.allowed_inbound_ipv6_cidr + target_port = local.internal_app_port + network_id = local.network_id + network_private_subnets = local.network_private_subnets + network_public_subnets = local.network_public_subnets + enable_private_only_traffic = var.private_only_traffic + private_endpoint_cidr = var.allowed_private_endpoint_cidr - network_id = local.network_id - network_private_subnets = local.network_private_subnets - network_public_subnets = local.network_public_subnets } module "private_link" { count = length(var.private_link_allowed_account_ids) > 0 ? 1 : 0 source = "./modules/private_link" - namespace = var.namespace - allowed_account_ids = var.private_link_allowed_account_ids - deletion_protection = var.deletion_protection - network_private_subnets = local.network_private_subnets - alb_name = local.lb_name_truncated - vpc_id = local.network_id - + namespace = var.namespace + allowed_account_ids = var.private_link_allowed_account_ids + deletion_protection = var.deletion_protection + network_private_subnets = local.network_private_subnets + alb_name = local.lb_name_truncated + vpc_id = local.network_id + enable_private_only_traffic = var.private_only_traffic + nlb_security_group = module.app_lb.nlb_security_group depends_on = [ + module.app_lb, module.wandb ] } diff --git a/modules/app_lb/main.tf b/modules/app_lb/main.tf index 464c6256..7a52be36 100644 --- a/modules/app_lb/main.tf +++ b/modules/app_lb/main.tf @@ -3,6 +3,36 @@ locals { https_port = 443 } +resource "aws_security_group" "inbound_private" { + count = var.enable_private_only_traffic ? 1 : 0 + name = "${var.namespace}-nlb-inbound" + description = "Allow http(s) inbound traffic from private endpoint to wandb" + vpc_id = var.network_id + + dynamic "ingress" { + for_each = var.private_endpoint_cidr + content { + from_port = local.https_port + to_port = local.https_port + protocol = "tcp" + description = "Allow HTTPS (port ${local.https_port}) traffic inbound to W&B LB" + cidr_blocks = [ingress.value] + } + } + + dynamic "ingress" { + for_each = var.private_endpoint_cidr + content { + from_port = local.http_port + to_port = local.http_port + protocol = "tcp" + description = "Allow HTTP (port ${local.http_port}) traffic inbound to W&B LB" + cidr_blocks = [ingress.value] + } + } +} + + resource "aws_security_group" "inbound" { name = "${var.namespace}-alb-inbound" description = "Allow http(s) traffic to wandb" @@ -27,6 +57,27 @@ resource "aws_security_group" "inbound" { } } +resource "aws_security_group_rule" "alb_http_traffic" { + count = var.enable_private_only_traffic ? 1 : 0 + type = "ingress" + from_port = local.http_port + to_port = local.http_port + protocol = "tcp" + security_group_id = aws_security_group.inbound.id + source_security_group_id = aws_security_group.inbound_private[0].id +} + +resource "aws_security_group_rule" "alb_https_traffic" { + count = var.enable_private_only_traffic ? 1 : 0 + type = "ingress" + from_port = local.https_port + to_port = local.https_port + protocol = "tcp" + security_group_id = aws_security_group.inbound.id + source_security_group_id = aws_security_group.inbound_private[0].id +} + + resource "aws_security_group" "outbound" { name = "${var.namespace}-alb-outbound" vpc_id = var.network_id diff --git a/modules/app_lb/outputs.tf b/modules/app_lb/outputs.tf index 9f3900bc..20724c32 100644 --- a/modules/app_lb/outputs.tf +++ b/modules/app_lb/outputs.tf @@ -12,4 +12,12 @@ output "lb_arn" { output "tg_app_arn" { value = aws_lb_target_group.app.arn +} + +output "alb_name" { +value = aws_lb.alb.arn +} + +output "nlb_security_group" { + value = var.enable_private_only_traffic? aws_security_group.inbound_private[0].id : null } \ No newline at end of file diff --git a/modules/app_lb/variables.tf b/modules/app_lb/variables.tf index df0837f0..bc1ab76d 100644 --- a/modules/app_lb/variables.tf +++ b/modules/app_lb/variables.tf @@ -73,4 +73,15 @@ variable "network_public_subnets" { variable "target_port" { type = number default = 32543 +} + + +variable "private_endpoint_cidr" { + description = "List of CIDR blocks allowed to access the wandb-server" + type = list(string) +} + +variable "enable_private_only_traffic" { + description = "Boolean flag to create sg" + type = bool } \ No newline at end of file diff --git a/modules/private_link/main.tf b/modules/private_link/main.tf index b0097437..bb2989c0 100644 --- a/modules/private_link/main.tf +++ b/modules/private_link/main.tf @@ -1,6 +1,6 @@ locals { max_lb_name_length = 32 - length("-nlb") - lb_name_truncated = "${substr(var.namespace, 0, local.max_lb_name_length)}-nlb" + lb_name_truncated = var.enable_private_only_traffic ? "${substr(var.namespace, 0, local.max_lb_name_length)}-private-link-nlb" : "${substr(var.namespace, 0, local.max_lb_name_length)}-nlb" } resource "aws_lb" "nlb" { @@ -9,6 +9,10 @@ resource "aws_lb" "nlb" { load_balancer_type = "network" subnets = var.network_private_subnets enable_deletion_protection = var.deletion_protection + security_groups = var.enable_private_only_traffic ? [var.nlb_security_group] : [] +lifecycle { + create_before_destroy = true +} } resource "aws_lb_target_group" "nlb" { diff --git a/modules/private_link/variables.tf b/modules/private_link/variables.tf index 8ba3e5e2..a5524f2d 100644 --- a/modules/private_link/variables.tf +++ b/modules/private_link/variables.tf @@ -27,3 +27,10 @@ variable "vpc_id" { description = "ID of the VPC to create the VPC Endpoint Service in" type = string } + +variable "enable_private_only_traffic" { + type = bool +} +variable "nlb_security_group" { + type = string +} \ No newline at end of file diff --git a/variables.tf b/variables.tf index bf75219d..eecebe74 100644 --- a/variables.tf +++ b/variables.tf @@ -267,6 +267,19 @@ variable "private_link_allowed_account_ids" { default = [] } +variable "allowed_private_endpoint_cidr" { + description = "Private CIDRs allowed to access wandb-server." + nullable = false + type = list(string) + default = [] +} + +variable "private_only_traffic" { + description = "Enable private only traffic from customer private network" + type = bool + default = false +} + ########################################## # EKS Cluster # ########################################## From 8a34f5833676f5c59b20b4ecc71c9cd8c4f50ff5 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 24 Jun 2024 10:08:24 +0000 Subject: [PATCH 36/72] chore(release): version 4.16.0 [skip ci] ## [4.16.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.15.0...v4.16.0) (2024-06-24) ### Features * Added private-only traffic feature ([#192](https://github.com/wandb/terraform-aws-wandb/issues/192)) ([1e75812](https://github.com/wandb/terraform-aws-wandb/commit/1e758122e9cb0df34aa2e4ded1368bce5be75278)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 698aa19e..25cc5484 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.16.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.15.0...v4.16.0) (2024-06-24) + + +### Features + +* Added private-only traffic feature ([#192](https://github.com/wandb/terraform-aws-wandb/issues/192)) ([1e75812](https://github.com/wandb/terraform-aws-wandb/commit/1e758122e9cb0df34aa2e4ded1368bce5be75278)) + ## [4.15.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.14.0...v4.15.0) (2024-06-24) From 8d290b83f654483823783e8562f9e378172a38a3 Mon Sep 17 00:00:00 2001 From: Aman Pruthi Date: Wed, 26 Jun 2024 14:58:48 +0530 Subject: [PATCH 37/72] feat: Fixed yace service account issue (#234) Added yace service account name variable --- main.tf | 76 ++++++++++++++++++++++++++++------- modules/iam_role/main.tf | 2 +- modules/iam_role/variables.tf | 4 ++ variables.tf | 5 +++ 4 files changed, 71 insertions(+), 16 deletions(-) diff --git a/main.tf b/main.tf index b1e1491b..dfd091ec 100644 --- a/main.tf +++ b/main.tf @@ -14,14 +14,14 @@ locals { } module "file_storage" { - count = var.create_bucket ? 1 : 0 - source = "./modules/file_storage" - - create_queue = !local.use_internal_queue + count = var.create_bucket ? 1 : 0 + source = "./modules/file_storage" + + create_queue = !local.use_internal_queue deletion_protection = var.deletion_protection - kms_key_arn = local.kms_key_arn - namespace = var.namespace - sse_algorithm = "aws:kms" + kms_key_arn = local.kms_key_arn + namespace = var.namespace + sse_algorithm = "aws:kms" } locals { @@ -172,8 +172,8 @@ module "app_lb" { acm_certificate_arn = local.acm_certificate_arn zone_id = var.zone_id - fqdn = local.full_fqdn - extra_fqdn = local.extra_fqdn + fqdn = local.full_fqdn + extra_fqdn = local.extra_fqdn allowed_inbound_cidr = var.allowed_inbound_cidr allowed_inbound_ipv6_cidr = var.allowed_inbound_ipv6_cidr target_port = local.internal_app_port @@ -235,13 +235,12 @@ locals { lb_name_truncated = "${substr(var.namespace, 0, local.max_lb_name_length)}-alb-k8s" } -data "aws_region" "current" {} - module "iam_role" { - count = var.enable_yace ? 1 : 0 - source = "./modules/iam_role" - namespace = var.namespace - aws_iam_openid_connect_provider_url = module.app_eks.aws_iam_openid_connect_provider + count = var.enable_yace ? 1 : 0 + source = "./modules/iam_role" + yace_sa_name = var.yace_sa_name + namespace = var.namespace + aws_iam_openid_connect_provider_url = module.app_eks.aws_iam_openid_connect_provider } module "wandb" { @@ -322,6 +321,53 @@ module "wandb" { }, var.app_wandb_env) } + # To support otel rds and redis metrics need operator-wandb chart minimum version 0.13.8 ( yace subchart) + yace = var.enable_yace ? { + install = true + regions = [data.aws_region.current.name] + serviceAccount = { annotations = { "eks.amazonaws.com/role-arn" = module.iam_role[0].role_arn } } + } : { + install = false + regions = [] + serviceAccount = {} + } + + otel = { + daemonset = var.enable_yace ? { + config = { + receivers = { + prometheus = { + config = { + scrape_configs = [ + { job_name = "yace" + scheme = "http" + metrics_path = "/metrics" + dns_sd_configs = [ + { names = ["wandb-yace"] + type = "A" + port = 5000 + } + ] + } + ] + } + } + } + service = { + pipelines = { + metrics = { + receivers = ["hostmetrics", "k8s_cluster", "kubeletstats", "prometheus"] + } + } + } + } + } : { config = { + receivers = {} + service = {} + } + } + } + # To support otel rds and redis metrics need operator-wandb chart minimum version 0.13.8 ( yace subchart) yace = var.enable_yace ? { install = true diff --git a/modules/iam_role/main.tf b/modules/iam_role/main.tf index 5d982e56..68005c22 100644 --- a/modules/iam_role/main.tf +++ b/modules/iam_role/main.tf @@ -14,7 +14,7 @@ resource "aws_iam_role" "irsa" { Action = ["sts:AssumeRoleWithWebIdentity"] Condition = { StringLike = { - "${var.aws_iam_openid_connect_provider_url}:sub" = "system:serviceaccount:*:yace" + "${var.aws_iam_openid_connect_provider_url}:sub" = "system:serviceaccount:default:${var.yace_sa_name}" "${var.aws_iam_openid_connect_provider_url}:aud" = "sts.amazonaws.com" } } diff --git a/modules/iam_role/variables.tf b/modules/iam_role/variables.tf index 4cd10397..d14948e5 100644 --- a/modules/iam_role/variables.tf +++ b/modules/iam_role/variables.tf @@ -5,4 +5,8 @@ variable "namespace" { variable "aws_iam_openid_connect_provider_url" { type = string +} + +variable "yace_sa_name" { + type = string } \ No newline at end of file diff --git a/variables.tf b/variables.tf index eecebe74..2f9aacc6 100644 --- a/variables.tf +++ b/variables.tf @@ -459,4 +459,9 @@ variable "enable_yace" { type = bool description = "deploy yet another cloudwatch exporter to fetch aws resources metrics" default = true +} + +variable "yace_sa_name" { + type = string + default = "wandb-yace" } \ No newline at end of file From 8d62fe3b7663c236c38dc36a1d4e6c1549e834db Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 26 Jun 2024 09:29:15 +0000 Subject: [PATCH 38/72] chore(release): version 4.17.0 [skip ci] ## [4.17.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.16.0...v4.17.0) (2024-06-26) ### Features * Fixed yace service account issue ([#234](https://github.com/wandb/terraform-aws-wandb/issues/234)) ([8d290b8](https://github.com/wandb/terraform-aws-wandb/commit/8d290b83f654483823783e8562f9e378172a38a3)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25cc5484..54f51b1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.17.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.16.0...v4.17.0) (2024-06-26) + + +### Features + +* Fixed yace service account issue ([#234](https://github.com/wandb/terraform-aws-wandb/issues/234)) ([8d290b8](https://github.com/wandb/terraform-aws-wandb/commit/8d290b83f654483823783e8562f9e378172a38a3)) + ## [4.16.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.15.0...v4.16.0) (2024-06-24) From 65411c2488ee8c9edd744e6e6cc4e203487dea7f Mon Sep 17 00:00:00 2001 From: Aastha Gupta <71313011+velotioaastha@users.noreply.github.com> Date: Mon, 8 Jul 2024 02:57:23 -0400 Subject: [PATCH 39/72] feat: Add example tf files for custom vpc, sql, redis, eks (#208) * add example tf files for custom vpc, sql, redis * add standard examples tf * add standard & custom examples tf --------- Co-authored-by: Aastha Gupta --- examples/byo-vpc-eks-sql-redis/main.tf | 258 ++++++++++ examples/byo-vpc-eks-sql-redis/variables.tf | 494 ++++++++++++++++++++ examples/byo-vpc-sql/main.tf | 364 +++++++++++++++ examples/byo-vpc-sql/variables.tf | 459 ++++++++++++++++++ examples/standard/main.tf | 91 ++++ examples/standard/variables.tf | 97 ++++ 6 files changed, 1763 insertions(+) create mode 100644 examples/byo-vpc-eks-sql-redis/main.tf create mode 100644 examples/byo-vpc-eks-sql-redis/variables.tf create mode 100644 examples/byo-vpc-sql/main.tf create mode 100644 examples/byo-vpc-sql/variables.tf create mode 100644 examples/standard/main.tf create mode 100644 examples/standard/variables.tf diff --git a/examples/byo-vpc-eks-sql-redis/main.tf b/examples/byo-vpc-eks-sql-redis/main.tf new file mode 100644 index 00000000..c8e51ecf --- /dev/null +++ b/examples/byo-vpc-eks-sql-redis/main.tf @@ -0,0 +1,258 @@ +provider "aws" { + region = "us-east-1" + + + default_tags { + tags = { + GithubRepo = "terraform-aws-wandb" + GithubOrg = "wandb" + Enviroment = "Example" + Example = "BYO-VPC-EKS-SQL-REDIS" + } + } +} +data "aws_s3_bucket" "file_storage" { + depends_on = [module.file_storage] + bucket = local.bucket_name +} + +data "aws_sqs_queue" "file_storage" { + count = local.use_internal_queue ? 0 : 1 + depends_on = [module.file_storage] + name = local.bucket_queue_name +} + +data "aws_eks_cluster" "app_cluster" { + name = var.eks_cluster_name +} + +data "aws_eks_cluster_auth" "app_cluster" { + name = var.eks_cluster_name +} + +provider "kubernetes" { + host = data.aws_eks_cluster.app_cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.app_cluster.token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name] + command = "aws" + } +} + +provider "helm" { + kubernetes { + host = data.aws_eks_cluster.app_cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.app_cluster.token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name] + command = "aws" + } + } +} + +locals { + kms_key_arn = var.kms_key_arn + use_external_bucket = var.bucket_name != "" + use_internal_queue = local.use_external_bucket || var.use_internal_queue +} + +module "file_storage" { + count = var.create_bucket ? 1 : 0 + source = "../../modules/file_storage" + + create_queue = !local.use_internal_queue + deletion_protection = var.deletion_protection + kms_key_arn = local.kms_key_arn + namespace = var.namespace + sse_algorithm = "aws:kms" +} + +locals { + bucket_name = local.use_external_bucket ? var.bucket_name : module.file_storage.0.bucket_name + bucket_queue_name = local.use_internal_queue ? null : module.file_storage.0.bucket_queue_name +} + +locals { + network_id = var.network_id + network_public_subnets = var.network_public_subnets + network_private_subnets = var.network_private_subnets +} + +locals { + create_certificate = var.public_access && var.acm_certificate_arn == null + + fqdn = var.subdomain == null ? var.domain_name : "${var.subdomain}.${var.domain_name}" +} + +# Create SSL Ceritifcation if applicable +module "acm" { + source = "terraform-aws-modules/acm/aws" + version = "~> 3.0" + + create_certificate = local.create_certificate + + subject_alternative_names = var.extra_fqdn + + domain_name = var.external_dns ? local.fqdn : var.domain_name + zone_id = var.zone_id + + wait_for_validation = true +} + +locals { + acm_certificate_arn = local.create_certificate ? module.acm.acm_certificate_arn : var.acm_certificate_arn + url = local.acm_certificate_arn == null ? "http://${local.fqdn}" : "https://${local.fqdn}" + domain_filter = var.custom_domain_filter == null || var.custom_domain_filter == "" ? local.fqdn : var.custom_domain_filter + internal_app_port = 32543 +} + +locals { + full_fqdn = var.enable_dummy_dns ? "old.${local.fqdn}" : local.fqdn + extra_fqdn = var.enable_dummy_dns ? [for fqdn in var.extra_fqdn : "old.${fqdn}"] : var.extra_fqdn +} + +module "app_lb" { + source = "../../modules/app_lb" + + namespace = var.namespace + load_balancing_scheme = var.public_access ? "PUBLIC" : "PRIVATE" + acm_certificate_arn = local.acm_certificate_arn + zone_id = var.zone_id + + fqdn = local.full_fqdn + extra_fqdn = local.extra_fqdn + allowed_inbound_cidr = var.allowed_inbound_cidr + allowed_inbound_ipv6_cidr = var.allowed_inbound_ipv6_cidr + target_port = local.internal_app_port + + network_id = local.network_id + network_private_subnets = local.network_private_subnets + network_public_subnets = local.network_public_subnets +} + +module "private_link" { + count = length(var.private_link_allowed_account_ids) > 0 ? 1 : 0 + source = "../../modules/private_link" + + namespace = var.namespace + allowed_account_ids = var.private_link_allowed_account_ids + deletion_protection = var.deletion_protection + network_private_subnets = local.network_private_subnets + alb_name = local.lb_name_truncated + vpc_id = local.network_id + + depends_on = [ + module.wandb + ] +} + +resource "aws_autoscaling_attachment" "autoscaling_attachment" { + for_each = var.autoscaling_group_names + autoscaling_group_name = each.value + lb_target_group_arn = module.app_lb.tg_app_arn +} + +locals { + max_lb_name_length = 32 - length("-alb-k8s") + lb_name_truncated = "${substr(var.namespace, 0, local.max_lb_name_length)}-alb-k8s" +} + +module "wandb" { + source = "wandb/wandb/helm" + version = "1.2.0" + + depends_on = [ + module.app_lb, + ] + operator_chart_version = "1.1.2" + controller_image_tag = "1.10.1" + + spec = { + values = { + global = { + host = local.url + license = var.license + + extraEnv = var.other_wandb_env + + bucket = { + provider = "s3" + name = local.bucket_name + region = data.aws_s3_bucket.file_storage.region + kmsKey = local.use_external_bucket ? var.bucket_kms_key_arn : local.kms_key_arn + } + + mysql = { + host = var.database_endpoint + password = var.database_master_password + user = var.database_master_username + database = var.database_name + port = var.database_port + } + + redis = { + host = var.redis_host + port = "${var.redis_port}?tls=true&ttlInSeconds=604800" + } + } + + ingress = { + class = "alb" + + additionalHosts = concat(var.extra_fqdn, length(var.private_link_allowed_account_ids) > 0 ? [""] : []) + + annotations = merge({ + "alb.ingress.kubernetes.io/load-balancer-name" = local.lb_name_truncated + "alb.ingress.kubernetes.io/inbound-cidrs" = <<-EOF + ${join("\\,", var.allowed_inbound_cidr)} + EOF + "external-dns.alpha.kubernetes.io/ingress-hostname-source" = "annotation-only" + "alb.ingress.kubernetes.io/scheme" = var.kubernetes_alb_internet_facing ? "internet-facing" : "internal" + "alb.ingress.kubernetes.io/target-type" = "ip" + "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" + "alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn + }, + length(var.extra_fqdn) > 0 && var.enable_dummy_dns ? { + "external-dns.alpha.kubernetes.io/hostname" = <<-EOF + ${local.fqdn}\,${join("\\,", var.extra_fqdn)}\,${local.fqdn} + EOF + } : { + "external-dns.alpha.kubernetes.io/hostname" = var.enable_operator_alb ? local.fqdn : "" + }, + length(var.kubernetes_alb_subnets) > 0 ? { + "alb.ingress.kubernetes.io/subnets" = <<-EOF + ${join("\\,", var.kubernetes_alb_subnets)} + EOF + } : {}) + + } + + app = var.enable_operator_alb ? {} : { + extraEnv = merge({ + "GORILLA_GLUE_LIST" = "true" + }, var.app_wandb_env) + } + + mysql = { install = false } + redis = { install = false } + + weave = { + persistence = { + provider = "efs" + efs = { + fileSystemId = var.efs_id + } + } + extraEnv = var.weave_wandb_env + } + + parquet = { + extraEnv = var.parquet_wandb_env + } + } + } +} diff --git a/examples/byo-vpc-eks-sql-redis/variables.tf b/examples/byo-vpc-eks-sql-redis/variables.tf new file mode 100644 index 00000000..2c8ff617 --- /dev/null +++ b/examples/byo-vpc-eks-sql-redis/variables.tf @@ -0,0 +1,494 @@ +########################################## +# Common # +########################################## +variable "namespace" { + type = string + description = "String used for prefix resources." +} + +variable "deletion_protection" { + description = "If the instance should have deletion protection enabled. The database / S3 can't be deleted when this value is set to `true`." + type = bool + default = false +} + +variable "use_internal_queue" { + type = bool + default = false +} + +variable "size" { + default = null + description = "Deployment size" + nullable = true + type = string +} + +########################################## +# Database # +########################################## +variable "database_engine_version" { + description = "Version for MySQL Auora" + type = string + default = "8.0.mysql_aurora.3.05.2" +} + +variable "database_instance_class" { + description = "Instance type to use by database master instance." + type = string + default = "db.r5.large" +} + +variable "database_snapshot_identifier" { + description = "Specifies whether or not to create this cluster from a snapshot. You can use either the name or ARN when specifying a DB cluster snapshot, or the ARN when specifying a DB snapshot" + type = string + default = null +} + +variable "database_sort_buffer_size" { + description = "Specifies the sort_buffer_size value to set for the database" + type = number + default = 67108864 +} + +variable "database_name" { + description = "Specifies the name of the database" + type = string + default = "wandb_local" +} + +variable "database_port" { + description = "Specifies the port of the database" + type = string + default = "3306" +} + +variable "database_master_username" { + description = "Specifies the master_username value to set for the database" + type = string + default = "wandb" +} + +variable "database_master_password" { + description = "Specifies the master_password value to set for the database" + type = string + sensitive = true +} + +variable "database_endpoint" { + description = "Specifies the endpoint value to set for the database" + type = string +} + +variable "database_binlog_format" { + description = "Specifies the binlog_format value to set for the database" + type = string + default = "ROW" +} + +variable "database_innodb_lru_scan_depth" { + description = "Specifies the innodb_lru_scan_depth value to set for the database" + type = number + default = 128 +} + +variable "database_performance_insights_kms_key_arn" { + default = null + description = "Specifies an existing KMS key ARN to encrypt the performance insights data if performance_insights_enabled is was enabled out of band" + nullable = true + type = string +} + +variable "database_security_group_id" { + description = "Specifies the security group id value to set for the database" + type = string +} + +########################################## +# DNS # +########################################## +variable "public_access" { + type = bool + default = false + description = "Is this instance accessable a public domain." +} + +variable "external_dns" { + type = bool + default = false + description = "Using external DNS. A `subdomain` must also be specified if this value is true." +} + +variable "custom_domain_filter" { + description = "A custom domain filter to be used by external-dns instead of the default FQDN. If not set, the local FQDN is used." + type = string + default = null +} + +# Sometimes domain name and zone name dont match, so lets explicitly ask for +# both. Also is just life easier to have both even though in most cause it may +# be redundant info. +# https://github.com/hashicorp/terraform-aws-terraform-enterprise/pull/41#issuecomment-563501858 +variable "zone_id" { + type = string + description = "Domain for creating the Weights & Biases subdomain on." +} + +variable "domain_name" { + type = string + description = "Domain for accessing the Weights & Biases UI." +} + +variable "subdomain" { + type = string + default = null + description = "Subdomain for accessing the Weights & Biases UI. Default creates record at Route53 Route." +} + +variable "enable_dummy_dns" { + type = bool + default = false + description = "Boolean indicating whether or not to enable dummy DNS for the old alb" +} + + +variable "enable_operator_alb" { + type = bool + default = false + description = "Boolean indicating whether to use operatore ALB (true) or not (false)." +} + +variable "extra_fqdn" { + type = list(string) + description = "Additional fqdn's must be in the same hosted zone as `domain_name`." + default = [] +} + +########################################## +# Load Balancer # +########################################## +variable "ssl_policy" { + type = string + default = "ELBSecurityPolicy-FS-1-2-Res-2020-10" + description = "SSL policy to use on ALB listener" +} + +variable "acm_certificate_arn" { + type = string + default = null + description = "The ARN of an existing ACM certificate." +} + +variable "allowed_inbound_cidr" { + description = "CIDRs allowed to access wandb-server." + nullable = false + type = list(string) +} + +variable "allowed_inbound_ipv6_cidr" { + description = "CIDRs allowed to access wandb-server." + nullable = false + type = list(string) +} + + +########################################## +# KMS # +########################################## +variable "kms_key_alias" { + type = string + description = "KMS key alias for AWS KMS Customer managed key." + default = null +} + +variable "kms_key_deletion_window" { + type = number + description = "Duration in days to destroy the key after it is deleted. Must be between 7 and 30 days." + default = 7 +} + +variable "kms_key_policy" { + type = string + description = "The policy that will define the permissions for the kms key." + default = "" +} + +variable "kms_key_arn" { + type = string + description = "KMS key arn for AWS KMS Customer managed key." +} + +########################################## +# Network # +########################################## +variable "create_vpc" { + type = bool + description = "Boolean indicating whether to deploy a VPC (true) or not (false)." + default = false +} + +variable "network_id" { + description = "The identity of the VPC in which resources will be deployed." + type = string +} + +variable "network_private_subnets" { + description = "A list of the identities of the private subnetworks in which resources will be deployed." + type = list(string) +} + +variable "network_public_subnets" { + description = "A list of the identities of the public subnetworks in which resources will be deployed." + type = list(string) +} + +variable "network_database_subnets" { + description = "A list of the identities of the database subnetworks in which resources will be deployed." + type = list(string) +} + +variable "network_elasticache_subnets" { + description = "A list of the identities of the subnetworks in which elasticache resources will be deployed." + type = list(string) +} + +variable "network_cidr" { + type = string + description = "CIDR block for VPC." + default = "10.10.0.0/16" +} + +variable "network_public_subnet_cidrs" { + type = list(string) + description = "List of private subnet CIDR ranges to create in VPC." + default = ["10.10.0.0/24", "10.10.1.0/24"] +} + +variable "network_private_subnet_cidrs" { + type = list(string) + description = "List of private subnet CIDR ranges to create in VPC." + default = ["10.10.10.0/24", "10.10.11.0/24"] +} + +variable "network_database_subnet_cidrs" { + type = list(string) + description = "List of private subnet CIDR ranges to create in VPC." + default = ["10.10.20.0/24", "10.10.21.0/24"] +} + +variable "network_elasticache_subnet_cidrs" { + type = list(string) + description = "List of private subnet CIDR ranges to create in VPC." + default = ["10.10.30.0/24", "10.10.31.0/24"] +} + +variable "private_link_allowed_account_ids" { + description = "List of AWS account IDs allowed to access the VPC Endpoint Service" + type = list(string) + default = [] +} + +########################################## +# EKS Cluster # +########################################## +variable "eks_cluster_name" { + description = "EKS cluster kubernetes name" + nullable = false + type = string +} + +variable "eks_cluster_version" { + description = "EKS cluster kubernetes version" + nullable = false + type = string +} + +variable "efs_id" { + description = "EFS id" + type = string +} + +variable "autoscaling_group_names" { + type = map(string) + default = { + "primary" : "" + } +} + +variable "kubernetes_alb_internet_facing" { + type = bool + description = "Indicates whether or not the ALB controlled by the Amazon ALB ingress controller is internet-facing or internal." + default = true +} + +variable "kubernetes_alb_subnets" { + type = list(string) + description = "List of subnet ID's the ALB will use for ingress traffic." + default = [] +} + +variable "kubernetes_public_access" { + type = bool + description = "Indicates whether or not the Amazon EKS public API server endpoint is enabled." + default = true +} + + +variable "kubernetes_public_access_cidrs" { + description = "List of CIDR blocks which can access the Amazon EKS public API server endpoint." + type = list(string) + default = [] +} + +variable "kubernetes_map_accounts" { + description = "Additional AWS account numbers to add to the aws-auth configmap." + type = list(string) + default = [] +} + +variable "kubernetes_map_roles" { + description = "Additional IAM roles to add to the aws-auth configmap." + type = list(object({ + rolearn = string + username = string + groups = list(string) + })) + default = [] +} + +variable "kubernetes_map_users" { + description = "Additional IAM users to add to the aws-auth configmap." + type = list(object({ + userarn = string + username = string + groups = list(string) + })) + default = [] +} + +variable "kubernetes_instance_types" { + description = "EC2 Instance type for primary node group." + type = list(string) + default = ["m5.large"] +} + +variable "kubernetes_node_count" { + description = "Number of nodes" + type = number + default = 2 +} + +variable "eks_policy_arns" { + type = list(string) + description = "Additional IAM policy to apply to the EKS cluster" + default = [] +} + +variable "system_reserved_cpu_millicores" { + description = "(Optional) The amount of 'system-reserved' CPU millicores to pass to the kubelet. For example: 100. A value of -1 disables the flag." + type = number + default = 70 +} + +variable "system_reserved_memory_megabytes" { + description = "(Optional) The amount of 'system-reserved' memory in megabytes to pass to the kubelet. For example: 100. A value of -1 disables the flag." + type = number + default = 100 +} + +variable "system_reserved_ephemeral_megabytes" { + description = "(Optional) The amount of 'system-reserved' ephemeral storage in megabytes to pass to the kubelet. For example: 1000. A value of -1 disables the flag." + type = number + default = 750 +} + +variable "system_reserved_pid" { + description = "(Optional) The amount of 'system-reserved' process ids [pid] to pass to the kubelet. For example: 1000. A value of -1 disables the flag." + type = number + default = 500 +} + +variable "aws_loadbalancer_controller_tags" { + description = "(Optional) A map of AWS tags to apply to all resources managed by the load balancer controller" + type = map(string) + default = {} +} + +########################################## +# External Bucket # +########################################## +# Most users will not need these settings. They are ment for users who want a +# bucket and sqs that are in a different account. +variable "create_bucket" { + type = bool + default = true +} + +variable "bucket_name" { + type = string + default = "" +} + +variable "bucket_kms_key_arn" { + type = string + description = "The Amazon Resource Name of the KMS key with which S3 storage bucket objects will be encrypted." + default = "" +} + +########################################## +# Redis # +########################################## +variable "create_elasticache" { + type = bool + description = "Boolean indicating whether to provision an elasticache instance (true) or not (false)." + default = false +} + +variable "elasticache_node_type" { + description = "The type of the redis cache node to deploy" + type = string + default = "cache.t2.medium" +} + +variable "redis_port" { + description = "Redis port" + type = string + default = "6379" +} + +variable "redis_host" { + description = "Redis host" + type = string +} + +# ########################################## +# # Weights & Biases # +# ########################################## +variable "license" { + type = string + description = "Weights & Biases license key." +} + +variable "other_wandb_env" { + type = map(any) + description = "Extra environment variables for W&B" + default = {} +} + +variable "weave_wandb_env" { + type = map(string) + description = "Extra environment variables for W&B" + default = {} +} + +variable "app_wandb_env" { + type = map(string) + description = "Extra environment variables for W&B" + default = {} +} + +variable "parquet_wandb_env" { + type = map(string) + description = "Extra environment variables for W&B" + default = {} +} diff --git a/examples/byo-vpc-sql/main.tf b/examples/byo-vpc-sql/main.tf new file mode 100644 index 00000000..55fbaf5c --- /dev/null +++ b/examples/byo-vpc-sql/main.tf @@ -0,0 +1,364 @@ +provider "aws" { + region = "us-east-1" + + + default_tags { + tags = { + GithubRepo = "terraform-aws-wandb" + GithubOrg = "wandb" + Enviroment = "Example" + Example = "BYO-VPC-SQL" + } + } +} +data "aws_s3_bucket" "file_storage" { + depends_on = [module.file_storage] + bucket = local.bucket_name +} + +data "aws_sqs_queue" "file_storage" { + count = local.use_internal_queue ? 0 : 1 + depends_on = [module.file_storage] + name = local.bucket_queue_name +} + +data "aws_eks_cluster" "app_cluster" { + name = module.app_eks.cluster_id +} + +data "aws_eks_cluster_auth" "app_cluster" { + name = module.app_eks.cluster_id +} + +provider "kubernetes" { + host = data.aws_eks_cluster.app_cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.app_cluster.token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name] + command = "aws" + } +} + +provider "helm" { + kubernetes { + host = data.aws_eks_cluster.app_cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.app_cluster.token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name] + command = "aws" + } + } +} + +module "kms" { + source = "../../modules/kms" + + key_alias = var.kms_key_alias == null ? "${var.namespace}-kms-alias" : var.kms_key_alias + key_deletion_window = var.kms_key_deletion_window + + key_policy = var.kms_key_policy +} + +locals { + kms_key_arn = module.kms.key.arn + use_external_bucket = var.bucket_name != "" + use_internal_queue = local.use_external_bucket || var.use_internal_queue + deployment_size = { + small = { + db = "db.r6g.large", + node_count = 3, + node_instance = "r6i.xlarge" + cache = "cache.m6g.large" + }, + medium = { + db = "db.r6g.xlarge", + node_count = 3, + node_instance = "r6i.xlarge" + cache = "cache.m6g.large" + }, + large = { + db = "db.r6g.2xlarge", + node_count = 3, + node_instance = "r6i.2xlarge" + cache = "cache.m6g.xlarge" + }, + xlarge = { + db = "db.r6g.4xlarge", + node_count = 3, + node_instance = "r6i.2xlarge" + cache = "cache.m6g.xlarge" + }, + xxlarge = { + db = "db.r6g.8xlarge", + node_count = 3, + node_instance = "r6i.4xlarge" + cache = "cache.m6g.2xlarge" + } + } +} + +module "file_storage" { + count = var.create_bucket ? 1 : 0 + source = "../../modules/file_storage" + + create_queue = !local.use_internal_queue + deletion_protection = var.deletion_protection + kms_key_arn = local.kms_key_arn + namespace = var.namespace + sse_algorithm = "aws:kms" +} + +locals { + bucket_name = local.use_external_bucket ? var.bucket_name : module.file_storage.0.bucket_name + bucket_queue_name = local.use_internal_queue ? null : module.file_storage.0.bucket_queue_name +} + +locals { + network_id = var.network_id + network_public_subnets = var.network_public_subnets + network_private_subnets = var.network_private_subnets + network_private_subnet_cidrs = var.network_private_subnet_cidrs +} + +locals { + create_certificate = var.public_access && var.acm_certificate_arn == null + + fqdn = var.subdomain == null ? var.domain_name : "${var.subdomain}.${var.domain_name}" +} + +# Create SSL Ceritifcation if applicable +module "acm" { + source = "terraform-aws-modules/acm/aws" + version = "~> 3.0" + + create_certificate = local.create_certificate + + subject_alternative_names = var.extra_fqdn + + domain_name = var.external_dns ? local.fqdn : var.domain_name + zone_id = var.zone_id + + wait_for_validation = true +} + +locals { + acm_certificate_arn = local.create_certificate ? module.acm.acm_certificate_arn : var.acm_certificate_arn + url = local.acm_certificate_arn == null ? "http://${local.fqdn}" : "https://${local.fqdn}" + domain_filter = var.custom_domain_filter == null || var.custom_domain_filter == "" ? local.fqdn : var.custom_domain_filter + + internal_app_port = 32543 +} + +module "app_eks" { + source = "../../modules/app_eks" + + fqdn = local.domain_filter + + namespace = var.namespace + kms_key_arn = local.kms_key_arn + + instance_types = try([local.deployment_size[var.size].node_instance], var.kubernetes_instance_types) + desired_capacity = try(local.deployment_size[var.size].node_count, var.kubernetes_node_count) + map_accounts = var.kubernetes_map_accounts + map_roles = var.kubernetes_map_roles + map_users = var.kubernetes_map_users + + bucket_kms_key_arn = local.use_external_bucket ? var.bucket_kms_key_arn : local.kms_key_arn + bucket_arn = data.aws_s3_bucket.file_storage.arn + bucket_sqs_queue_arn = local.use_internal_queue ? null : data.aws_sqs_queue.file_storage.0.arn + + network_id = local.network_id + network_private_subnets = local.network_private_subnets + + lb_security_group_inbound_id = module.app_lb.security_group_inbound_id + database_security_group_id = var.database_security_group_id + + create_elasticache_security_group = var.create_elasticache + elasticache_security_group_id = var.create_elasticache ? module.redis.0.security_group_id : null + + cluster_version = var.eks_cluster_version + cluster_endpoint_public_access = var.kubernetes_public_access + cluster_endpoint_public_access_cidrs = var.kubernetes_public_access_cidrs + + eks_policy_arns = var.eks_policy_arns + + system_reserved_cpu_millicores = var.system_reserved_cpu_millicores + system_reserved_memory_megabytes = var.system_reserved_memory_megabytes + system_reserved_ephemeral_megabytes = var.system_reserved_ephemeral_megabytes + system_reserved_pid = var.system_reserved_pid + + aws_loadbalancer_controller_tags = var.aws_loadbalancer_controller_tags +} + +locals { + full_fqdn = var.enable_dummy_dns ? "old.${local.fqdn}" : local.fqdn + extra_fqdn = var.enable_dummy_dns ? [for fqdn in var.extra_fqdn : "old.${fqdn}"] : var.extra_fqdn +} + +module "app_lb" { + source = "../../modules/app_lb" + + namespace = var.namespace + load_balancing_scheme = var.public_access ? "PUBLIC" : "PRIVATE" + acm_certificate_arn = local.acm_certificate_arn + zone_id = var.zone_id + + fqdn = local.full_fqdn + extra_fqdn = local.extra_fqdn + allowed_inbound_cidr = var.allowed_inbound_cidr + allowed_inbound_ipv6_cidr = var.allowed_inbound_ipv6_cidr + target_port = local.internal_app_port + + network_id = local.network_id + network_private_subnets = local.network_private_subnets + network_public_subnets = local.network_public_subnets +} + +module "private_link" { + count = length(var.private_link_allowed_account_ids) > 0 ? 1 : 0 + source = "../../modules/private_link" + + namespace = var.namespace + allowed_account_ids = var.private_link_allowed_account_ids + deletion_protection = var.deletion_protection + network_private_subnets = local.network_private_subnets + alb_name = local.lb_name_truncated + vpc_id = local.network_id + + depends_on = [ + module.wandb + ] +} + +resource "aws_autoscaling_attachment" "autoscaling_attachment" { + for_each = module.app_eks.autoscaling_group_names + autoscaling_group_name = each.value + lb_target_group_arn = module.app_lb.tg_app_arn +} + +locals { + network_elasticache_subnets = var.network_elasticache_subnets + network_elasticache_subnet_cidrs = var.network_elasticache_subnet_cidrs + network_elasticache_create_subnet_group = true + network_elasticache_subnet_group_name = "${var.namespace}-elasticache-subnet" +} + +module "redis" { + count = var.create_elasticache ? 1 : 0 + redis_create_subnet_group = local.network_elasticache_create_subnet_group + redis_subnets = local.network_elasticache_subnets + source = "../../modules/redis" + namespace = var.namespace + + vpc_id = local.network_id + redis_subnet_group_name = local.network_elasticache_subnet_group_name + vpc_subnets_cidr_blocks = local.network_elasticache_subnet_cidrs + node_type = try(local.deployment_size[var.size].cache, var.elasticache_node_type) + kms_key_arn = local.kms_key_arn +} + +locals { + max_lb_name_length = 32 - length("-alb-k8s") + lb_name_truncated = "${substr(var.namespace, 0, local.max_lb_name_length)}-alb-k8s" +} + +module "wandb" { + source = "wandb/wandb/helm" + version = "1.2.0" + + depends_on = [ + module.app_eks, + module.redis, + ] + operator_chart_version = "1.1.2" + controller_image_tag = "1.10.1" + + spec = { + values = { + global = { + host = local.url + license = var.license + + extraEnv = var.other_wandb_env + + bucket = { + provider = "s3" + name = local.bucket_name + region = data.aws_s3_bucket.file_storage.region + kmsKey = local.use_external_bucket ? var.bucket_kms_key_arn : local.kms_key_arn + } + + mysql = { + host = var.database_endpoint + password = var.database_master_password + user = var.database_master_username + database = var.database_name + port = var.database_port + } + + redis = { + host = module.redis.0.host + port = "${module.redis.0.port}?tls=true&ttlInSeconds=604800" + } + } + + ingress = { + class = "alb" + + additionalHosts = concat(var.extra_fqdn, length(var.private_link_allowed_account_ids) > 0 ? [""] : []) + + annotations = merge({ + "alb.ingress.kubernetes.io/load-balancer-name" = local.lb_name_truncated + "alb.ingress.kubernetes.io/inbound-cidrs" = <<-EOF + ${join("\\,", var.allowed_inbound_cidr)} + EOF + "external-dns.alpha.kubernetes.io/ingress-hostname-source" = "annotation-only" + "alb.ingress.kubernetes.io/scheme" = var.kubernetes_alb_internet_facing ? "internet-facing" : "internal" + "alb.ingress.kubernetes.io/target-type" = "ip" + "alb.ingress.kubernetes.io/listen-ports" = "[{\\\"HTTPS\\\": 443}]" + "alb.ingress.kubernetes.io/certificate-arn" = local.acm_certificate_arn + }, + length(var.extra_fqdn) > 0 && var.enable_dummy_dns ? { + "external-dns.alpha.kubernetes.io/hostname" = <<-EOF + ${local.fqdn}\,${join("\\,", var.extra_fqdn)}\,${local.fqdn} + EOF + } : { + "external-dns.alpha.kubernetes.io/hostname" = var.enable_operator_alb ? local.fqdn : "" + }, + length(var.kubernetes_alb_subnets) > 0 ? { + "alb.ingress.kubernetes.io/subnets" = <<-EOF + ${join("\\,", var.kubernetes_alb_subnets)} + EOF + } : {}) + + } + + app = var.enable_operator_alb ? {} : { + extraEnv = merge({ + "GORILLA_GLUE_LIST" = "true" + }, var.app_wandb_env) + } + + mysql = { install = false } + redis = { install = false } + + weave = { + persistence = { + provider = "efs" + efs = { + fileSystemId = module.app_eks.efs_id + } + } + extraEnv = var.weave_wandb_env + } + + parquet = { + extraEnv = var.parquet_wandb_env + } + } + } +} diff --git a/examples/byo-vpc-sql/variables.tf b/examples/byo-vpc-sql/variables.tf new file mode 100644 index 00000000..aadff404 --- /dev/null +++ b/examples/byo-vpc-sql/variables.tf @@ -0,0 +1,459 @@ +########################################## +# Common # +########################################## +variable "namespace" { + type = string + description = "String used for prefix resources." +} + +variable "deletion_protection" { + description = "If the instance should have deletion protection enabled. The database / S3 can't be deleted when this value is set to `true`." + type = bool + default = true +} + +variable "use_internal_queue" { + type = bool + default = false +} + +variable "size" { + default = null + description = "Deployment size" + nullable = true + type = string +} + +########################################## +# Database # +########################################## +variable "database_engine_version" { + description = "Version for MySQL Auora" + type = string + default = "8.0.mysql_aurora.3.05.2" +} + +variable "database_instance_class" { + description = "Instance type to use by database master instance." + type = string + default = "db.r5.large" +} + +variable "database_snapshot_identifier" { + description = "Specifies whether or not to create this cluster from a snapshot. You can use either the name or ARN when specifying a DB cluster snapshot, or the ARN when specifying a DB snapshot" + type = string + default = null +} + +variable "database_sort_buffer_size" { + description = "Specifies the sort_buffer_size value to set for the database" + type = number + default = 67108864 +} + +variable "database_name" { + description = "Specifies the name of the database" + type = string + default = "wandb_local" +} + +variable "database_port" { + description = "Specifies the port of the database" + type = string + default = "3306" +} + +variable "database_master_username" { + description = "Specifies the master_username value to set for the database" + type = string + default = "wandb" +} + +variable "database_master_password" { + description = "Specifies the master_password value to set for the database" + type = string + sensitive = true +} + +variable "database_endpoint" { + description = "Specifies the endpoint value to set for the database" + type = string +} + +variable "database_binlog_format" { + description = "Specifies the binlog_format value to set for the database" + type = string + default = "ROW" +} + +variable "database_innodb_lru_scan_depth" { + description = "Specifies the innodb_lru_scan_depth value to set for the database" + type = number + default = 128 +} + +variable "database_performance_insights_kms_key_arn" { + default = null + description = "Specifies an existing KMS key ARN to encrypt the performance insights data if performance_insights_enabled is was enabled out of band" + nullable = true + type = string +} + +variable "database_security_group_id" { + description = "Specifies the security group id value to set for the database" + type = string +} + +########################################## +# DNS # +########################################## +variable "public_access" { + type = bool + default = false + description = "Is this instance accessable a public domain." +} + +variable "external_dns" { + type = bool + default = false + description = "Using external DNS. A `subdomain` must also be specified if this value is true." +} + +variable "custom_domain_filter" { + description = "A custom domain filter to be used by external-dns instead of the default FQDN. If not set, the local FQDN is used." + type = string + default = null +} + +# Sometimes domain name and zone name dont match, so lets explicitly ask for +# both. Also is just life easier to have both even though in most cause it may +# be redundant info. +# https://github.com/hashicorp/terraform-aws-terraform-enterprise/pull/41#issuecomment-563501858 +variable "zone_id" { + type = string + description = "Domain for creating the Weights & Biases subdomain on." +} + +variable "domain_name" { + type = string + description = "Domain for accessing the Weights & Biases UI." +} + +variable "subdomain" { + type = string + default = null + description = "Subdomain for accessing the Weights & Biases UI. Default creates record at Route53 Route." +} + +variable "enable_dummy_dns" { + type = bool + default = false + description = "Boolean indicating whether or not to enable dummy DNS for the old alb" +} + + +variable "enable_operator_alb" { + type = bool + default = false + description = "Boolean indicating whether to use operatore ALB (true) or not (false)." +} + +variable "extra_fqdn" { + type = list(string) + description = "Additional fqdn's must be in the same hosted zone as `domain_name`." + default = [] +} + +########################################## +# Load Balancer # +########################################## +variable "ssl_policy" { + type = string + default = "ELBSecurityPolicy-FS-1-2-Res-2020-10" + description = "SSL policy to use on ALB listener" +} + +variable "acm_certificate_arn" { + type = string + default = null + description = "The ARN of an existing ACM certificate." +} + +variable "allowed_inbound_cidr" { + description = "CIDRs allowed to access wandb-server." + nullable = false + type = list(string) +} + +variable "allowed_inbound_ipv6_cidr" { + description = "CIDRs allowed to access wandb-server." + nullable = false + type = list(string) +} + + +########################################## +# KMS # +########################################## +variable "kms_key_alias" { + type = string + description = "KMS key alias for AWS KMS Customer managed key." + default = null +} + +variable "kms_key_deletion_window" { + type = number + description = "Duration in days to destroy the key after it is deleted. Must be between 7 and 30 days." + default = 7 +} + +variable "kms_key_policy" { + type = string + description = "The policy that will define the permissions for the kms key." + default = "" +} + +########################################## +# Network # +########################################## +variable "create_vpc" { + type = bool + description = "Boolean indicating whether to deploy a VPC (true) or not (false)." + default = false +} + +variable "network_id" { + description = "The identity of the VPC in which resources will be deployed." + type = string +} + +variable "network_private_subnets" { + description = "A list of the identities of the private subnetworks in which resources will be deployed." + type = list(string) +} + +variable "network_public_subnets" { + description = "A list of the identities of the public subnetworks in which resources will be deployed." + type = list(string) +} + +variable "network_database_subnets" { + description = "A list of the identities of the database subnetworks in which resources will be deployed." + type = list(string) +} + +variable "network_elasticache_subnets" { + description = "A list of the identities of the subnetworks in which elasticache resources will be deployed." + type = list(string) +} + +variable "network_cidr" { + type = string + description = "CIDR block for VPC." + default = "10.10.0.0/16" +} + +variable "network_public_subnet_cidrs" { + type = list(string) + description = "List of private subnet CIDR ranges to create in VPC." + default = ["10.10.0.0/24", "10.10.1.0/24"] +} + +variable "network_private_subnet_cidrs" { + type = list(string) + description = "List of private subnet CIDR ranges to create in VPC." + default = ["10.10.10.0/24", "10.10.11.0/24"] +} + +variable "network_database_subnet_cidrs" { + type = list(string) + description = "List of private subnet CIDR ranges to create in VPC." + default = ["10.10.20.0/24", "10.10.21.0/24"] +} + +variable "network_elasticache_subnet_cidrs" { + type = list(string) + description = "List of private subnet CIDR ranges to create in VPC." + default = ["10.10.30.0/24", "10.10.31.0/24"] +} + +variable "private_link_allowed_account_ids" { + description = "List of AWS account IDs allowed to access the VPC Endpoint Service" + type = list(string) + default = [] +} + +########################################## +# EKS Cluster # +########################################## +variable "eks_cluster_version" { + description = "EKS cluster kubernetes version" + nullable = false + type = string +} +variable "kubernetes_alb_internet_facing" { + type = bool + description = "Indicates whether or not the ALB controlled by the Amazon ALB ingress controller is internet-facing or internal." + default = true +} + +variable "kubernetes_alb_subnets" { + type = list(string) + description = "List of subnet ID's the ALB will use for ingress traffic." + default = [] +} + +variable "kubernetes_public_access" { + type = bool + description = "Indicates whether or not the Amazon EKS public API server endpoint is enabled." + default = true +} + + +variable "kubernetes_public_access_cidrs" { + description = "List of CIDR blocks which can access the Amazon EKS public API server endpoint." + type = list(string) + default = [] +} + +variable "kubernetes_map_accounts" { + description = "Additional AWS account numbers to add to the aws-auth configmap." + type = list(string) + default = [] +} + +variable "kubernetes_map_roles" { + description = "Additional IAM roles to add to the aws-auth configmap." + type = list(object({ + rolearn = string + username = string + groups = list(string) + })) + default = [] +} + +variable "kubernetes_map_users" { + description = "Additional IAM users to add to the aws-auth configmap." + type = list(object({ + userarn = string + username = string + groups = list(string) + })) + default = [] +} + +variable "kubernetes_instance_types" { + description = "EC2 Instance type for primary node group." + type = list(string) + default = ["m5.large"] +} + +variable "kubernetes_node_count" { + description = "Number of nodes" + type = number + default = 2 +} + +variable "eks_policy_arns" { + type = list(string) + description = "Additional IAM policy to apply to the EKS cluster" + default = [] +} + +variable "system_reserved_cpu_millicores" { + description = "(Optional) The amount of 'system-reserved' CPU millicores to pass to the kubelet. For example: 100. A value of -1 disables the flag." + type = number + default = 70 +} + +variable "system_reserved_memory_megabytes" { + description = "(Optional) The amount of 'system-reserved' memory in megabytes to pass to the kubelet. For example: 100. A value of -1 disables the flag." + type = number + default = 100 +} + +variable "system_reserved_ephemeral_megabytes" { + description = "(Optional) The amount of 'system-reserved' ephemeral storage in megabytes to pass to the kubelet. For example: 1000. A value of -1 disables the flag." + type = number + default = 750 +} + +variable "system_reserved_pid" { + description = "(Optional) The amount of 'system-reserved' process ids [pid] to pass to the kubelet. For example: 1000. A value of -1 disables the flag." + type = number + default = 500 +} + +variable "aws_loadbalancer_controller_tags" { + description = "(Optional) A map of AWS tags to apply to all resources managed by the load balancer controller" + type = map(string) + default = {} +} + +########################################## +# External Bucket # +########################################## +# Most users will not need these settings. They are ment for users who want a +# bucket and sqs that are in a different account. +variable "create_bucket" { + type = bool + default = true +} + +variable "bucket_name" { + type = string + default = "" +} + +variable "bucket_kms_key_arn" { + type = string + description = "The Amazon Resource Name of the KMS key with which S3 storage bucket objects will be encrypted." + default = "" +} + +########################################## +# Redis # +########################################## +variable "create_elasticache" { + type = bool + description = "Boolean indicating whether to provision an elasticache instance (true) or not (false)." + default = true +} + +variable "elasticache_node_type" { + description = "The type of the redis cache node to deploy" + type = string + default = "cache.t2.medium" +} + +# ########################################## +# # Weights & Biases # +# ########################################## +variable "license" { + type = string + description = "Weights & Biases license key." +} + +variable "other_wandb_env" { + type = map(any) + description = "Extra environment variables for W&B" + default = {} +} + +variable "weave_wandb_env" { + type = map(string) + description = "Extra environment variables for W&B" + default = {} +} + +variable "app_wandb_env" { + type = map(string) + description = "Extra environment variables for W&B" + default = {} +} + +variable "parquet_wandb_env" { + type = map(string) + description = "Extra environment variables for W&B" + default = {} +} diff --git a/examples/standard/main.tf b/examples/standard/main.tf new file mode 100644 index 00000000..aae263ea --- /dev/null +++ b/examples/standard/main.tf @@ -0,0 +1,91 @@ +provider "aws" { + region = "us-east-1" + + + default_tags { + tags = { + GithubRepo = "terraform-aws-wandb" + GithubOrg = "wandb" + Enviroment = "Example" + Example = "Standard" + } + } +} + +module "wandb_infra" { + source = "../../" + + namespace = var.namespace + deletion_protection = false + size = "medium" + + database_instance_class = var.database_instance_class + database_engine_version = var.database_engine_version + database_snapshot_identifier = var.database_snapshot_identifier + database_sort_buffer_size = var.database_sort_buffer_size + + allowed_inbound_cidr = var.allowed_inbound_cidr + allowed_inbound_ipv6_cidr = ["::/0"] + + eks_cluster_version = var.eks_cluster_version + kubernetes_public_access = true + kubernetes_public_access_cidrs = ["0.0.0.0/0"] + + domain_name = var.domain_name + zone_id = var.zone_id + subdomain = var.subdomain + + license = var.license + + bucket_name = var.bucket_name + bucket_kms_key_arn = var.bucket_kms_key_arn +} + +data "aws_eks_cluster" "app_cluster" { + name = module.wandb_infra.cluster_id +} + +data "aws_eks_cluster_auth" "app_cluster" { + name = module.wandb_infra.cluster_id +} + +provider "kubernetes" { + host = data.aws_eks_cluster.app_cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.app_cluster.token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name] + command = "aws" + } +} + +provider "helm" { + kubernetes { + host = data.aws_eks_cluster.app_cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.app_cluster.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.app_cluster.token + exec { + api_version = "client.authentication.k8s.io/v1beta1" + args = ["eks", "get-token", "--cluster-name", data.aws_eks_cluster.app_cluster.name] + command = "aws" + } + } +} + + +output "bucket_name" { + value = module.wandb_infra.bucket_name +} + +output "bucket_queue_name" { + value = module.wandb_infra.bucket_queue_name +} + +output "database_connection_string" { + value = module.wandb_infra.database_connection_string +} + +output "elasticache_connection_string" { + value = module.wandb_infra.elasticache_connection_string +} diff --git a/examples/standard/variables.tf b/examples/standard/variables.tf new file mode 100644 index 00000000..b9c961c4 --- /dev/null +++ b/examples/standard/variables.tf @@ -0,0 +1,97 @@ +variable "namespace" { + type = string + description = "String used for prefix resources." +} + +variable "allowed_inbound_cidr" { + description = "CIDRs allowed to access wandb-server." + nullable = false + type = list(string) +} + +variable "zone_id" { + type = string + description = "Domain for creating the Weights & Biases subdomain on." +} + +variable "domain_name" { + type = string + description = "Domain for accessing the Weights & Biases UI." +} + +variable "subdomain" { + type = string + default = null + description = "Subdomain for accessing the Weights & Biases UI. Default creates record at Route53 Route." +} + +########################################## +# Database # +########################################## +variable "database_engine_version" { + description = "Version for MySQL Auora" + type = string + default = "8.0.mysql_aurora.3.05.2" +} + +variable "database_instance_class" { + description = "Instance type to use by database master instance." + type = string + default = "db.r5.large" +} + +variable "database_snapshot_identifier" { + description = "Specifies whether or not to create this cluster from a snapshot. You can use either the name or ARN when specifying a DB cluster snapshot, or the ARN when specifying a DB snapshot" + type = string + default = null +} + +variable "database_sort_buffer_size" { + description = "Specifies the sort_buffer_size value to set for the database" + type = number + default = 67108864 +} + +########################################## +# External Bucket # +########################################## +# Most users will not need these settings. They are ment for users who want a +# bucket and sqs that are in a different account. +variable "create_bucket" { + type = bool + default = true +} + +variable "bucket_name" { + type = string + default = "" +} + +variable "bucket_kms_key_arn" { + type = string + description = "The Amazon Resource Name of the KMS key with which S3 storage bucket objects will be encrypted." + default = "" +} + +########################################## +# EKS Cluster # +########################################## +variable "eks_cluster_version" { + description = "EKS cluster kubernetes version" + nullable = false + type = string +} + +########################################### +# Weights & Biases # +########################################### +variable "license" { + type = string + description = "Weights & Biases license key." +} + +variable "other_wandb_env" { + type = map(string) + description = "Extra environment variables for W&B" + default = {} +} \ No newline at end of file From 001cfe5110b2909d5c980fc11c095e1883afdc27 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 8 Jul 2024 06:57:51 +0000 Subject: [PATCH 40/72] chore(release): version 4.18.0 [skip ci] ## [4.18.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.17.0...v4.18.0) (2024-07-08) ### Features * Add example tf files for custom vpc, sql, redis, eks ([#208](https://github.com/wandb/terraform-aws-wandb/issues/208)) ([65411c2](https://github.com/wandb/terraform-aws-wandb/commit/65411c2488ee8c9edd744e6e6cc4e203487dea7f)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54f51b1b..fc2c434c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.18.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.17.0...v4.18.0) (2024-07-08) + + +### Features + +* Add example tf files for custom vpc, sql, redis, eks ([#208](https://github.com/wandb/terraform-aws-wandb/issues/208)) ([65411c2](https://github.com/wandb/terraform-aws-wandb/commit/65411c2488ee8c9edd744e6e6cc4e203487dea7f)) + ## [4.17.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.16.0...v4.17.0) (2024-06-26) From 08ed7faac3c1f18e264feb3f1864d37845520bb2 Mon Sep 17 00:00:00 2001 From: Aman Pruthi Date: Tue, 9 Jul 2024 10:31:55 +0530 Subject: [PATCH 41/72] feat: Resolved yace conflict (#239) Resolved yace conflict --- data.tf | 2 +- main.tf | 49 +------------------------------------------------ 2 files changed, 2 insertions(+), 49 deletions(-) diff --git a/data.tf b/data.tf index 21e6e2d0..b11404e0 100644 --- a/data.tf +++ b/data.tf @@ -9,4 +9,4 @@ data "aws_sqs_queue" "file_storage" { name = local.bucket_queue_name } -data "aws_region" "current" { } \ No newline at end of file +data "aws_region" "current" {} \ No newline at end of file diff --git a/main.tf b/main.tf index dfd091ec..31a7ca75 100644 --- a/main.tf +++ b/main.tf @@ -321,7 +321,7 @@ module "wandb" { }, var.app_wandb_env) } - # To support otel rds and redis metrics need operator-wandb chart minimum version 0.13.8 ( yace subchart) + # To support otel rds and redis metrics, we need operator-wandb chart min version 0.13.8 (yace subchart) yace = var.enable_yace ? { install = true regions = [data.aws_region.current.name] @@ -368,53 +368,6 @@ module "wandb" { } } - # To support otel rds and redis metrics need operator-wandb chart minimum version 0.13.8 ( yace subchart) - yace = var.enable_yace ? { - install = true - regions = [data.aws_region.current.name] - serviceAccount = { annotations = { "eks.amazonaws.com/role-arn" = module.iam_role[0].role_arn} } - } : { - install = false - regions = [] - serviceAccount = {} - } - - otel = { - daemonset = var.enable_yace ? { - config = { - receivers = { - prometheus = { - config = { - scrape_configs = [ - { job_name = "yace" - scheme = "http" - metrics_path = "/metrics" - dns_sd_configs = [ - { names = ["yace"] - type = "A" - port = 5000 - } - ] - } - ] - } - } - } - service = { - pipelines = { - metrics = { - receivers = ["hostmetrics", "k8s_cluster", "kubeletstats", "prometheus"] - } - } - } - } - } : { config = { - receivers = {} - service = {} - } - } - } - mysql = { install = false } redis = { install = false } From de649d55d8f05b4c741e5eaf3b9bf1fb4e3c1b32 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Tue, 9 Jul 2024 05:02:24 +0000 Subject: [PATCH 42/72] chore(release): version 4.19.0 [skip ci] ## [4.19.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.18.0...v4.19.0) (2024-07-09) ### Features * Resolved yace conflict ([#239](https://github.com/wandb/terraform-aws-wandb/issues/239)) ([08ed7fa](https://github.com/wandb/terraform-aws-wandb/commit/08ed7faac3c1f18e264feb3f1864d37845520bb2)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc2c434c..0ebb69d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.19.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.18.0...v4.19.0) (2024-07-09) + + +### Features + +* Resolved yace conflict ([#239](https://github.com/wandb/terraform-aws-wandb/issues/239)) ([08ed7fa](https://github.com/wandb/terraform-aws-wandb/commit/08ed7faac3c1f18e264feb3f1864d37845520bb2)) + ## [4.18.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.17.0...v4.18.0) (2024-07-08) From bc7c957307a852c94a6f6f4400a215101052fcac Mon Sep 17 00:00:00 2001 From: Aastha Gupta <71313011+velotioaastha@users.noreply.github.com> Date: Wed, 10 Jul 2024 08:06:18 -0400 Subject: [PATCH 43/72] feat: Support for encrypting the database and bucket with CMK (#182) * added changed to add encrytion feature for s3 and db, with same and different kms keys and with external kms keys and external bucket. * added changed to add encrytion feature for s3 and db, with same and different kms keys and with external kms keys and external bucket. * added changed to add encrytion feature for s3 and db, with same and different kms keys and with external kms keys and external bucket. * added changes for encryption as per the new requirements. * added changes for encryption as per the new requirements. * Added readme doc for keys encryption * Resolved PR comments * removed create_kms boolean and updated conditions * added changes to update validation block. * added changes to update validation block. * added changed to add encrytion feature for s3 and db, with same and different kms keys and with external kms keys and external bucket. * added changed to add encrytion feature for s3 and db, with same and different kms keys and with external kms keys and external bucket. * added changed to add encrytion feature for s3 and db, with same and different kms keys and with external kms keys and external bucket. * added changes for encryption as per the new requirements. * added changes for encryption as per the new requirements. * Added readme doc for keys encryption * Resolved PR comments * removed create_kms boolean and updated conditions * added changes to update validation block. * added changes to update validation block. * update db kms key arn validation * fix terraform lint --------- Co-authored-by: Aastha Gupta Co-authored-by: Aastha Gupta --- README.md | 29 ++++++++++++++++++++++++ main.tf | 37 ++++++++++++++++-------------- modules/app_eks/iam-policies.tf | 6 ++--- modules/app_eks/iam-roles.tf | 8 +++---- modules/database/main.tf | 2 +- modules/iam_role/main.tf | 14 ++++++------ outputs.tf | 2 +- variables.tf | 40 ++++++++++++++++++++++----------- versions.tf | 2 +- 9 files changed, 93 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 3f9de70e..6f39dde6 100644 --- a/README.md +++ b/README.md @@ -272,6 +272,35 @@ module "wandb" { } ``` +### Alow customer specific customer-managed keys for S3 and RDS +- we can provide external kms key to encrypt database, redis and S3 buckets. +- To provide kms keys we need to provide kms arn values in +``` +db_kms_key_arn +bucket_kms_key_arn +``` +### In order to allow cross account KMS keys. we need to allow kms keys to be accessed by WandB account. +this can be donw by adding the following policy document. +``` +{ + "Sid": "Allow use of the key", + "Effect": "Allow", + "Principal": { + "AWS": [ + "arn:aws:iam:::root" + ] + }, + "Action": [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ], + "Resource": "*" + } +``` + ### Upgrading from 2.x -> 3.x - No changes required by you diff --git a/main.tf b/main.tf index 31a7ca75..b3ce58c3 100644 --- a/main.tf +++ b/main.tf @@ -8,20 +8,23 @@ module "kms" { } locals { - kms_key_arn = module.kms.key.arn - use_external_bucket = var.bucket_name != "" - use_internal_queue = local.use_external_bucket || var.use_internal_queue + + default_kms_key = module.kms.key.arn + s3_kms_key_arn = length(var.bucket_kms_key_arn) > 0 ? var.bucket_kms_key_arn : local.default_kms_key + db_kms_key_arn = length(var.db_kms_key_arn) > 0 ? var.db_kms_key_arn : local.default_kms_key + database_performance_insights_kms_key_arn = length(var.database_performance_insights_kms_key_arn) > 0 ? var.database_performance_insights_kms_key_arn : local.default_kms_key + use_external_bucket = var.bucket_name != "" + use_internal_queue = local.use_external_bucket || var.use_internal_queue } module "file_storage" { - count = var.create_bucket ? 1 : 0 - source = "./modules/file_storage" - - create_queue = !local.use_internal_queue + count = var.create_bucket ? 1 : 0 + source = "./modules/file_storage" + namespace = var.namespace + create_queue = !local.use_internal_queue + sse_algorithm = "aws:kms" + kms_key_arn = local.s3_kms_key_arn deletion_protection = var.deletion_protection - kms_key_arn = local.kms_key_arn - namespace = var.namespace - sse_algorithm = "aws:kms" } locals { @@ -68,8 +71,8 @@ module "database" { source = "./modules/database" namespace = var.namespace - kms_key_arn = local.kms_key_arn - performance_insights_kms_key_arn = var.database_performance_insights_kms_key_arn + kms_key_arn = local.db_kms_key_arn + performance_insights_kms_key_arn = local.database_performance_insights_kms_key_arn database_name = var.database_name master_username = var.database_master_username @@ -95,7 +98,7 @@ locals { fqdn = var.subdomain == null ? var.domain_name : "${var.subdomain}.${var.domain_name}" } -# Create SSL Ceritifcation if applicable +#Create SSL Ceritifcation if applicable module "acm" { source = "terraform-aws-modules/acm/aws" version = "~> 3.0" @@ -124,7 +127,7 @@ module "app_eks" { fqdn = local.domain_filter namespace = var.namespace - kms_key_arn = local.kms_key_arn + kms_key_arn = local.default_kms_key instance_types = try([local.deployment_size[var.size].node_instance], var.kubernetes_instance_types) desired_capacity = try(local.deployment_size[var.size].node_count, var.kubernetes_node_count) @@ -132,7 +135,7 @@ module "app_eks" { map_roles = var.kubernetes_map_roles map_users = var.kubernetes_map_users - bucket_kms_key_arn = local.use_external_bucket ? var.bucket_kms_key_arn : local.kms_key_arn + bucket_kms_key_arn = local.s3_kms_key_arn bucket_arn = data.aws_s3_bucket.file_storage.arn bucket_sqs_queue_arn = local.use_internal_queue ? null : data.aws_sqs_queue.file_storage.0.arn @@ -227,7 +230,7 @@ module "redis" { redis_subnet_group_name = local.network_elasticache_subnet_group_name vpc_subnets_cidr_blocks = local.network_elasticache_subnet_cidrs node_type = try(local.deployment_size[var.size].cache, var.elasticache_node_type) - kms_key_arn = local.kms_key_arn + kms_key_arn = local.db_kms_key_arn } locals { @@ -267,7 +270,7 @@ module "wandb" { provider = "s3" name = local.bucket_name region = data.aws_s3_bucket.file_storage.region - kmsKey = local.use_external_bucket ? var.bucket_kms_key_arn : local.kms_key_arn + kmsKey = local.s3_kms_key_arn } mysql = { diff --git a/modules/app_eks/iam-policies.tf b/modules/app_eks/iam-policies.tf index 6ce0528a..a46b9ecd 100644 --- a/modules/app_eks/iam-policies.tf +++ b/modules/app_eks/iam-policies.tf @@ -48,13 +48,13 @@ resource "aws_iam_policy" "secrets_manager" { resource "aws_iam_policy" "irsa" { name = "${var.namespace}-irsa-policy" description = "IRSA IAM Policy" - + policy = jsonencode({ Version = "2012-10-17" Statement = [ { - Effect = "Allow" - Action = [ + Effect = "Allow" + Action = [ "s3:*", "kms:*", ] diff --git a/modules/app_eks/iam-roles.tf b/modules/app_eks/iam-roles.tf index 9654b4ce..fd2dfc4d 100644 --- a/modules/app_eks/iam-roles.tf +++ b/modules/app_eks/iam-roles.tf @@ -8,15 +8,15 @@ resource "aws_iam_role" "node" { resource "aws_iam_role" "irsa" { name = "${var.namespace}-irsa-role" assume_role_policy = jsonencode({ - Version = "2012-10-17" + Version = "2012-10-17" Statement = [ { - Sid = "" - Effect = "Allow" + Sid = "" + Effect = "Allow" Principal = { Federated = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${aws_iam_openid_connect_provider.eks.url}" } - Action = "sts:AssumeRoleWithWebIdentity" + Action = "sts:AssumeRoleWithWebIdentity" Condition = { StringLike = { "${aws_iam_openid_connect_provider.eks.url}:sub" = "system:serviceaccount:${var.namespace}:*" diff --git a/modules/database/main.tf b/modules/database/main.tf index c5d7b8bb..f60e984d 100644 --- a/modules/database/main.tf +++ b/modules/database/main.tf @@ -9,7 +9,7 @@ resource "random_string" "master_password" { } locals { - engine_version_tag = "80" + engine_version_tag = "80" parameter_family = "aurora-mysql8.0" parameter_group_name = "${var.namespace}-aurora-db-${local.engine_version_tag}-parameter-group" parameter_cluster_name = "${var.namespace}-aurora-${local.engine_version_tag}-cluster-parameter-group" diff --git a/modules/iam_role/main.tf b/modules/iam_role/main.tf index 68005c22..42c15c0c 100644 --- a/modules/iam_role/main.tf +++ b/modules/iam_role/main.tf @@ -3,15 +3,15 @@ data "aws_caller_identity" "current" {} resource "aws_iam_role" "irsa" { name = "${var.namespace}-yace-irsa-role" assume_role_policy = jsonencode({ - Version = "2012-10-17" + Version = "2012-10-17" Statement = [ { - Sid = "" - Effect = "Allow" + Sid = "" + Effect = "Allow" Principal = { Federated = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:oidc-provider/${var.aws_iam_openid_connect_provider_url}" } - Action = ["sts:AssumeRoleWithWebIdentity"] + Action = ["sts:AssumeRoleWithWebIdentity"] Condition = { StringLike = { "${var.aws_iam_openid_connect_provider_url}:sub" = "system:serviceaccount:default:${var.yace_sa_name}" @@ -27,13 +27,13 @@ resource "aws_iam_role" "irsa" { resource "aws_iam_policy" "irsa" { name = "${var.namespace}-yace-irsa-policy" description = "IRSA IAM Policy" - + policy = jsonencode({ Version = "2012-10-17" Statement = [ { - Effect = "Allow" - Action = [ + Effect = "Allow" + Action = [ "tag:GetResources", "cloudwatch:GetMetricData", "cloudwatch:GetMetricStatistics", diff --git a/outputs.tf b/outputs.tf index ed522703..188e49f1 100644 --- a/outputs.tf +++ b/outputs.tf @@ -48,7 +48,7 @@ output "internal_app_port" { } output "kms_key_arn" { - value = local.kms_key_arn + value = local.default_kms_key description = "The Amazon Resource Name of the KMS key used to encrypt data at rest." } diff --git a/variables.tf b/variables.tf index 2f9aacc6..2e5ddf2c 100644 --- a/variables.tf +++ b/variables.tf @@ -1,6 +1,6 @@ -########################################## -# Common # -########################################## +# ########################################## +# # Common # +# ########################################## variable "namespace" { type = string description = "String used for prefix resources." @@ -76,7 +76,7 @@ variable "database_innodb_lru_scan_depth" { } variable "database_performance_insights_kms_key_arn" { - default = null + default = "" description = "Specifies an existing KMS key ARN to encrypt the performance insights data if performance_insights_enabled is was enabled out of band" nullable = true type = string @@ -401,11 +401,13 @@ variable "bucket_name" { type = string default = "" } - variable "bucket_kms_key_arn" { - type = string - description = "The Amazon Resource Name of the KMS key with which S3 storage bucket objects will be encrypted." - default = "" + type = string + default = "" + validation { + condition = can(regex("^arn:aws:kms:[a-z0-9-]+:[0-9]+:key/[a-zA-Z0-9-_]+$", var.bucket_kms_key_arn)) || var.bucket_kms_key_arn == "" + error_message = "Invalid value for bucket kms ARN" + } } ########################################## @@ -423,9 +425,9 @@ variable "elasticache_node_type" { default = "cache.t2.medium" } -# ########################################## -# # Weights & Biases # -# ########################################## +########################################## +# Weights & Biases # +########################################## variable "license" { type = string description = "Weights & Biases license key." @@ -456,12 +458,24 @@ variable "parquet_wandb_env" { } variable "enable_yace" { - type = bool + type = bool description = "deploy yet another cloudwatch exporter to fetch aws resources metrics" - default = true + default = true } variable "yace_sa_name" { type = string default = "wandb-yace" +} + +########################################## +# New Vars for Encryption # +########################################## +variable "db_kms_key_arn" { + type = string + default = "" + validation { + condition = can(regex("^arn:aws:kms:[a-z0-9-]+:[0-9]+:key/[a-zA-Z0-9-_]+$", var.db_kms_key_arn)) || var.db_kms_key_arn == "" + error_message = "Invalid value for db kms ARN" + } } \ No newline at end of file diff --git a/versions.tf b/versions.tf index 1ffbef44..f13ff0d0 100644 --- a/versions.tf +++ b/versions.tf @@ -10,4 +10,4 @@ terraform { version = "~> 2.23" } } -} +} \ No newline at end of file From 3d817815a32b200bbccc355e833f3149565c1a85 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 10 Jul 2024 12:06:52 +0000 Subject: [PATCH 44/72] chore(release): version 4.20.0 [skip ci] ## [4.20.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.19.0...v4.20.0) (2024-07-10) ### Features * Support for encrypting the database and bucket with CMK ([#182](https://github.com/wandb/terraform-aws-wandb/issues/182)) ([bc7c957](https://github.com/wandb/terraform-aws-wandb/commit/bc7c957307a852c94a6f6f4400a215101052fcac)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ebb69d3..4ffa4711 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.20.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.19.0...v4.20.0) (2024-07-10) + + +### Features + +* Support for encrypting the database and bucket with CMK ([#182](https://github.com/wandb/terraform-aws-wandb/issues/182)) ([bc7c957](https://github.com/wandb/terraform-aws-wandb/commit/bc7c957307a852c94a6f6f4400a215101052fcac)) + ## [4.19.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.18.0...v4.19.0) (2024-07-09) From 91017d4e1d21140be24102b7e5129b4498183749 Mon Sep 17 00:00:00 2001 From: Daniel Panzella Date: Thu, 11 Jul 2024 07:24:05 -0700 Subject: [PATCH 45/72] fix: Pass cloudprovider value to the helm charts (#240) --- main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.tf b/main.tf index b3ce58c3..b7966bcc 100644 --- a/main.tf +++ b/main.tf @@ -263,7 +263,7 @@ module "wandb" { global = { host = local.url license = var.license - + cloudProvider = "aws" extraEnv = var.other_wandb_env bucket = { From 7c5a2d43dfab0552dfdec59d0cd11fce52c51dba Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Thu, 11 Jul 2024 14:24:34 +0000 Subject: [PATCH 46/72] chore(release): version 4.20.1 [skip ci] ### [4.20.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.0...v4.20.1) (2024-07-11) ### Bug Fixes * Pass cloudprovider value to the helm charts ([#240](https://github.com/wandb/terraform-aws-wandb/issues/240)) ([91017d4](https://github.com/wandb/terraform-aws-wandb/commit/91017d4e1d21140be24102b7e5129b4498183749)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ffa4711..cb2a02e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.20.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.0...v4.20.1) (2024-07-11) + + +### Bug Fixes + +* Pass cloudprovider value to the helm charts ([#240](https://github.com/wandb/terraform-aws-wandb/issues/240)) ([91017d4](https://github.com/wandb/terraform-aws-wandb/commit/91017d4e1d21140be24102b7e5129b4498183749)) + ## [4.20.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.19.0...v4.20.0) (2024-07-10) From 7aba49119e24ffe68bc7e35dddde127040bfef3e Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+zacharyblasczyk@users.noreply.github.com> Date: Thu, 11 Jul 2024 09:42:52 -0500 Subject: [PATCH 47/72] fix: AWS VPC CNI revert (#236) * fix: AWS VPC CNI revert * Add Depends on. --- modules/app_eks/add-ons.tf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/app_eks/add-ons.tf b/modules/app_eks/add-ons.tf index 56503d6c..3db67c0f 100644 --- a/modules/app_eks/add-ons.tf +++ b/modules/app_eks/add-ons.tf @@ -72,9 +72,12 @@ resource "aws_eks_addon" "kube_proxy" { } resource "aws_eks_addon" "vpc_cni" { + depends_on = [ + module.eks + ] cluster_name = var.namespace addon_name = "vpc-cni" - addon_version = "v1.18.0-eksbuild.1" + addon_version = "v1.18.2-eksbuild.1" resolve_conflicts = "OVERWRITE" service_account_role_arn = aws_iam_role.oidc.arn } From a745466564d82aff498710a7d60c8c1450d31d0e Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Thu, 11 Jul 2024 14:43:19 +0000 Subject: [PATCH 48/72] chore(release): version 4.20.2 [skip ci] ### [4.20.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.1...v4.20.2) (2024-07-11) ### Bug Fixes * AWS VPC CNI revert ([#236](https://github.com/wandb/terraform-aws-wandb/issues/236)) ([7aba491](https://github.com/wandb/terraform-aws-wandb/commit/7aba49119e24ffe68bc7e35dddde127040bfef3e)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb2a02e1..9c26373a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.20.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.1...v4.20.2) (2024-07-11) + + +### Bug Fixes + +* AWS VPC CNI revert ([#236](https://github.com/wandb/terraform-aws-wandb/issues/236)) ([7aba491](https://github.com/wandb/terraform-aws-wandb/commit/7aba49119e24ffe68bc7e35dddde127040bfef3e)) + ### [4.20.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.0...v4.20.1) (2024-07-11) From 8f20d3e3a455f348c2f9eb11582ffff592929cf7 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+zacharyblasczyk@users.noreply.github.com> Date: Thu, 11 Jul 2024 10:24:44 -0500 Subject: [PATCH 49/72] fix: Naming Conventions (#241) * fix: Naming Conventions * formating --- README.md | 25 +++++++++++++++++-------- main.tf | 24 ++++++++++++------------ variables.tf | 29 ++++++++++++----------------- 3 files changed, 41 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 6f39dde6..4a06731d 100644 --- a/README.md +++ b/README.md @@ -112,10 +112,6 @@ Upgrades must be executed in step-wise fashion from one version to the next. You -### Notes on EKS Add-ons -If a terraform apply fails because an add-on is already installed, remove the add-on using the AWS console or the AWS -CLI and re-run the apply. Running pods will not be impacted. - ## Requirements | Name | Version | @@ -139,16 +135,19 @@ CLI and re-run the apply. Running pods will not be impacted. | [app\_lb](#module\_app\_lb) | ./modules/app_lb | n/a | | [database](#module\_database) | ./modules/database | n/a | | [file\_storage](#module\_file\_storage) | ./modules/file_storage | n/a | +| [iam\_role](#module\_iam\_role) | ./modules/iam_role | n/a | | [kms](#module\_kms) | ./modules/kms | n/a | | [networking](#module\_networking) | ./modules/networking | n/a | | [private\_link](#module\_private\_link) | ./modules/private_link | n/a | | [redis](#module\_redis) | ./modules/redis | n/a | +| [s3\_endpoint](#module\_s3\_endpoint) | ./modules/endpoint | n/a | | [wandb](#module\_wandb) | wandb/wandb/helm | 1.2.0 | ## Resources | Name | Type | |------|------| +| [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source | | [aws_s3_bucket.file_storage](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket) | data source | | [aws_sqs_queue.file_storage](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/sqs_queue) | data source | @@ -159,9 +158,10 @@ CLI and re-run the apply. Running pods will not be impacted. | [acm\_certificate\_arn](#input\_acm\_certificate\_arn) | The ARN of an existing ACM certificate. | `string` | `null` | no | | [allowed\_inbound\_cidr](#input\_allowed\_inbound\_cidr) | CIDRs allowed to access wandb-server. | `list(string)` | n/a | yes | | [allowed\_inbound\_ipv6\_cidr](#input\_allowed\_inbound\_ipv6\_cidr) | CIDRs allowed to access wandb-server. | `list(string)` | n/a | yes | +| [allowed\_private\_endpoint\_cidr](#input\_allowed\_private\_endpoint\_cidr) | Private CIDRs allowed to access wandb-server. | `list(string)` | `[]` | no | | [app\_wandb\_env](#input\_app\_wandb\_env) | Extra environment variables for W&B | `map(string)` | `{}` | no | | [aws\_loadbalancer\_controller\_tags](#input\_aws\_loadbalancer\_controller\_tags) | (Optional) A map of AWS tags to apply to all resources managed by the load balancer controller | `map(string)` | `{}` | no | -| [bucket\_kms\_key\_arn](#input\_bucket\_kms\_key\_arn) | The Amazon Resource Name of the KMS key with which S3 storage bucket objects will be encrypted. | `string` | `""` | no | +| [bucket\_kms\_key\_arn](#input\_bucket\_kms\_key\_arn) | n/a | `string` | `""` | no | | [bucket\_name](#input\_bucket\_name) | n/a | `string` | `""` | no | | [create\_bucket](#input\_create\_bucket) | ######################################### External Bucket # ######################################### Most users will not need these settings. They are ment for users who want a bucket and sqs that are in a different account. | `bool` | `true` | no | | [create\_elasticache](#input\_create\_elasticache) | Boolean indicating whether to provision an elasticache instance (true) or not (false). | `bool` | `true` | no | @@ -171,9 +171,10 @@ CLI and re-run the apply. Running pods will not be impacted. | [database\_engine\_version](#input\_database\_engine\_version) | Version for MySQL Auora | `string` | `"8.0.mysql_aurora.3.05.2"` | no | | [database\_innodb\_lru\_scan\_depth](#input\_database\_innodb\_lru\_scan\_depth) | Specifies the innodb\_lru\_scan\_depth value to set for the database | `number` | `128` | no | | [database\_instance\_class](#input\_database\_instance\_class) | Instance type to use by database master instance. | `string` | `"db.r5.large"` | no | +| [database\_kms\_key\_arn](#input\_database\_kms\_key\_arn) | n/a | `string` | `""` | no | | [database\_master\_username](#input\_database\_master\_username) | Specifies the master\_username value to set for the database | `string` | `"wandb"` | no | | [database\_name](#input\_database\_name) | Specifies the name of the database | `string` | `"wandb_local"` | no | -| [database\_performance\_insights\_kms\_key\_arn](#input\_database\_performance\_insights\_kms\_key\_arn) | Specifies an existing KMS key ARN to encrypt the performance insights data if performance\_insights\_enabled is was enabled out of band | `string` | `null` | no | +| [database\_performance\_insights\_kms\_key\_arn](#input\_database\_performance\_insights\_kms\_key\_arn) | Specifies an existing KMS key ARN to encrypt the performance insights data if performance\_insights\_enabled is was enabled out of band | `string` | `""` | no | | [database\_snapshot\_identifier](#input\_database\_snapshot\_identifier) | Specifies whether or not to create this cluster from a snapshot. You can use either the name or ARN when specifying a DB cluster snapshot, or the ARN when specifying a DB snapshot | `string` | `null` | no | | [database\_sort\_buffer\_size](#input\_database\_sort\_buffer\_size) | Specifies the sort\_buffer\_size value to set for the database | `number` | `67108864` | no | | [deletion\_protection](#input\_deletion\_protection) | If the instance should have deletion protection enabled. The database / S3 can't be deleted when this value is set to `true`. | `bool` | `true` | no | @@ -183,6 +184,7 @@ CLI and re-run the apply. Running pods will not be impacted. | [elasticache\_node\_type](#input\_elasticache\_node\_type) | The type of the redis cache node to deploy | `string` | `"cache.t2.medium"` | no | | [enable\_dummy\_dns](#input\_enable\_dummy\_dns) | Boolean indicating whether or not to enable dummy DNS for the old alb | `bool` | `false` | no | | [enable\_operator\_alb](#input\_enable\_operator\_alb) | Boolean indicating whether to use operatore ALB (true) or not (false). | `bool` | `false` | no | +| [enable\_yace](#input\_enable\_yace) | deploy yet another cloudwatch exporter to fetch aws resources metrics | `bool` | `true` | no | | [external\_dns](#input\_external\_dns) | Using external DNS. A `subdomain` must also be specified if this value is true. | `bool` | `false` | no | | [extra\_fqdn](#input\_extra\_fqdn) | Additional fqdn's must be in the same hosted zone as `domain_name`. | `list(string)` | `[]` | no | | [kms\_key\_alias](#input\_kms\_key\_alias) | KMS key alias for AWS KMS Customer managed key. | `string` | `null` | no | @@ -212,6 +214,7 @@ CLI and re-run the apply. Running pods will not be impacted. | [other\_wandb\_env](#input\_other\_wandb\_env) | Extra environment variables for W&B | `map(any)` | `{}` | no | | [parquet\_wandb\_env](#input\_parquet\_wandb\_env) | Extra environment variables for W&B | `map(string)` | `{}` | no | | [private\_link\_allowed\_account\_ids](#input\_private\_link\_allowed\_account\_ids) | List of AWS account IDs allowed to access the VPC Endpoint Service | `list(string)` | `[]` | no | +| [private\_only\_traffic](#input\_private\_only\_traffic) | Enable private only traffic from customer private network | `bool` | `false` | no | | [public\_access](#input\_public\_access) | Is this instance accessable a public domain. | `bool` | `false` | no | | [size](#input\_size) | Deployment size | `string` | `null` | no | | [ssl\_policy](#input\_ssl\_policy) | SSL policy to use on ALB listener | `string` | `"ELBSecurityPolicy-FS-1-2-Res-2020-10"` | no | @@ -222,6 +225,7 @@ CLI and re-run the apply. Running pods will not be impacted. | [system\_reserved\_pid](#input\_system\_reserved\_pid) | (Optional) The amount of 'system-reserved' process ids [pid] to pass to the kubelet. For example: 1000. A value of -1 disables the flag. | `number` | `500` | no | | [use\_internal\_queue](#input\_use\_internal\_queue) | n/a | `bool` | `false` | no | | [weave\_wandb\_env](#input\_weave\_wandb\_env) | Extra environment variables for W&B | `map(string)` | `{}` | no | +| [yace\_sa\_name](#input\_yace\_sa\_name) | n/a | `string` | `"wandb-yace"` | no | | [zone\_id](#input\_zone\_id) | Domain for creating the Weights & Biases subdomain on. | `string` | n/a | yes | ## Outputs @@ -273,14 +277,19 @@ module "wandb" { ``` ### Alow customer specific customer-managed keys for S3 and RDS + - we can provide external kms key to encrypt database, redis and S3 buckets. -- To provide kms keys we need to provide kms arn values in +- To provide kms keys we need to provide kms arn values in + ``` -db_kms_key_arn +database_kms_key_arn bucket_kms_key_arn ``` + ### In order to allow cross account KMS keys. we need to allow kms keys to be accessed by WandB account. + this can be donw by adding the following policy document. + ``` { "Sid": "Allow use of the key", diff --git a/main.tf b/main.tf index b7966bcc..c91e230f 100644 --- a/main.tf +++ b/main.tf @@ -11,19 +11,19 @@ locals { default_kms_key = module.kms.key.arn s3_kms_key_arn = length(var.bucket_kms_key_arn) > 0 ? var.bucket_kms_key_arn : local.default_kms_key - db_kms_key_arn = length(var.db_kms_key_arn) > 0 ? var.db_kms_key_arn : local.default_kms_key + database_kms_key_arn = length(var.database_kms_key_arn) > 0 ? var.database_kms_key_arn : local.default_kms_key database_performance_insights_kms_key_arn = length(var.database_performance_insights_kms_key_arn) > 0 ? var.database_performance_insights_kms_key_arn : local.default_kms_key use_external_bucket = var.bucket_name != "" use_internal_queue = local.use_external_bucket || var.use_internal_queue } module "file_storage" { - count = var.create_bucket ? 1 : 0 - source = "./modules/file_storage" - namespace = var.namespace - create_queue = !local.use_internal_queue - sse_algorithm = "aws:kms" - kms_key_arn = local.s3_kms_key_arn + count = var.create_bucket ? 1 : 0 + source = "./modules/file_storage" + namespace = var.namespace + create_queue = !local.use_internal_queue + sse_algorithm = "aws:kms" + kms_key_arn = local.s3_kms_key_arn deletion_protection = var.deletion_protection } @@ -71,7 +71,7 @@ module "database" { source = "./modules/database" namespace = var.namespace - kms_key_arn = local.db_kms_key_arn + kms_key_arn = local.database_kms_key_arn performance_insights_kms_key_arn = local.database_performance_insights_kms_key_arn database_name = var.database_name @@ -230,7 +230,7 @@ module "redis" { redis_subnet_group_name = local.network_elasticache_subnet_group_name vpc_subnets_cidr_blocks = local.network_elasticache_subnet_cidrs node_type = try(local.deployment_size[var.size].cache, var.elasticache_node_type) - kms_key_arn = local.db_kms_key_arn + kms_key_arn = local.database_kms_key_arn } locals { @@ -261,10 +261,10 @@ module "wandb" { spec = { values = { global = { - host = local.url - license = var.license + host = local.url + license = var.license cloudProvider = "aws" - extraEnv = var.other_wandb_env + extraEnv = var.other_wandb_env bucket = { provider = "s3" diff --git a/variables.tf b/variables.tf index 2e5ddf2c..fd73db38 100644 --- a/variables.tf +++ b/variables.tf @@ -80,7 +80,14 @@ variable "database_performance_insights_kms_key_arn" { description = "Specifies an existing KMS key ARN to encrypt the performance insights data if performance_insights_enabled is was enabled out of band" nullable = true type = string - +} +variable "database_kms_key_arn" { + type = string + default = "" + validation { + condition = can(regex("^arn:aws:kms:[a-z0-9-]+:[0-9]+:key/[a-zA-Z0-9-_]+$", var.database_kms_key_arn)) || var.database_kms_key_arn == "" + error_message = "Invalid value for db kms ARN" + } } ########################################## @@ -271,13 +278,13 @@ variable "allowed_private_endpoint_cidr" { description = "Private CIDRs allowed to access wandb-server." nullable = false type = list(string) - default = [] + default = [] } variable "private_only_traffic" { description = "Enable private only traffic from customer private network" - type = bool - default = false + type = bool + default = false } ########################################## @@ -464,18 +471,6 @@ variable "enable_yace" { } variable "yace_sa_name" { - type = string + type = string default = "wandb-yace" } - -########################################## -# New Vars for Encryption # -########################################## -variable "db_kms_key_arn" { - type = string - default = "" - validation { - condition = can(regex("^arn:aws:kms:[a-z0-9-]+:[0-9]+:key/[a-zA-Z0-9-_]+$", var.db_kms_key_arn)) || var.db_kms_key_arn == "" - error_message = "Invalid value for db kms ARN" - } -} \ No newline at end of file From e03de160f4b4e4e2ad1f0229a71942f3e9c409fe Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Thu, 11 Jul 2024 15:25:13 +0000 Subject: [PATCH 50/72] chore(release): version 4.20.3 [skip ci] ### [4.20.3](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.2...v4.20.3) (2024-07-11) ### Bug Fixes * Naming Conventions ([#241](https://github.com/wandb/terraform-aws-wandb/issues/241)) ([8f20d3e](https://github.com/wandb/terraform-aws-wandb/commit/8f20d3e3a455f348c2f9eb11582ffff592929cf7)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c26373a..6ff8e3ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.20.3](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.2...v4.20.3) (2024-07-11) + + +### Bug Fixes + +* Naming Conventions ([#241](https://github.com/wandb/terraform-aws-wandb/issues/241)) ([8f20d3e](https://github.com/wandb/terraform-aws-wandb/commit/8f20d3e3a455f348c2f9eb11582ffff592929cf7)) + ### [4.20.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.1...v4.20.2) (2024-07-11) From 1d2fb921792019b6356e0f89b7c117dda168339a Mon Sep 17 00:00:00 2001 From: Nick Penaranda Date: Wed, 17 Jul 2024 19:05:43 -0400 Subject: [PATCH 51/72] feat: Set up KMS key for clickhouse CMEK and endpoint for PL (#243) set up KMS key for clickhouse CMEK and endpoint for PL --- main.tf | 19 ++++++---- modules/kms/main.tf | 66 +++++++++++++++++++++++++++++++++ modules/kms/outputs.tf | 10 ++++- modules/kms/variables.tf | 13 ++++++- modules/networking/main.tf | 10 +++++ modules/networking/variables.tf | 8 +++- outputs.tf | 6 +++ variables.tf | 18 +++++++++ 8 files changed, 139 insertions(+), 11 deletions(-) diff --git a/main.tf b/main.tf index c91e230f..b3043a2e 100644 --- a/main.tf +++ b/main.tf @@ -1,15 +1,19 @@ module "kms" { source = "./modules/kms" - key_alias = var.kms_key_alias == null ? "${var.namespace}-kms-alias" : var.kms_key_alias key_deletion_window = var.kms_key_deletion_window + key_alias = var.kms_key_alias == null ? "${var.namespace}-kms-alias" : var.kms_key_alias key_policy = var.kms_key_policy + + clickhouse_key_alias = var.kms_clickhouse_key_alias == null ? "${var.namespace}-kms-clickhouse-alias" : var.kms_clickhouse_key_alias + clickhouse_key_policy = var.kms_clickhouse_key_policy } locals { default_kms_key = module.kms.key.arn + clickhouse_kms_key = module.kms.clickhouse_key.arn s3_kms_key_arn = length(var.bucket_kms_key_arn) > 0 ? var.bucket_kms_key_arn : local.default_kms_key database_kms_key_arn = length(var.database_kms_key_arn) > 0 ? var.database_kms_key_arn : local.default_kms_key database_performance_insights_kms_key_arn = length(var.database_performance_insights_kms_key_arn) > 0 ? var.database_performance_insights_kms_key_arn : local.default_kms_key @@ -37,12 +41,13 @@ module "networking" { namespace = var.namespace create_vpc = var.create_vpc - cidr = var.network_cidr - private_subnet_cidrs = var.network_private_subnet_cidrs - public_subnet_cidrs = var.network_public_subnet_cidrs - database_subnet_cidrs = var.network_database_subnet_cidrs - create_elasticache_subnet = var.create_elasticache - elasticache_subnet_cidrs = var.network_elasticache_subnet_cidrs + cidr = var.network_cidr + private_subnet_cidrs = var.network_private_subnet_cidrs + public_subnet_cidrs = var.network_public_subnet_cidrs + database_subnet_cidrs = var.network_database_subnet_cidrs + create_elasticache_subnet = var.create_elasticache + elasticache_subnet_cidrs = var.network_elasticache_subnet_cidrs + clickhouse_endpoint_service_id = var.clickhouse_endpoint_service_id } locals { diff --git a/modules/kms/main.tf b/modules/kms/main.tf index 75739974..d600ae34 100644 --- a/modules/kms/main.tf +++ b/modules/kms/main.tf @@ -89,3 +89,69 @@ resource "aws_kms_grant" "main" { "ReEncryptTo", ] } + +resource "aws_kms_key" "clickhouse_key" { + deletion_window_in_days = var.key_deletion_window + description = "AWS KMS Customer-managed key to encrypt Weave resources in Clickhouse" + key_usage = "ENCRYPT_DECRYPT" + + policy = var.clickhouse_key_policy != "" ? var.clickhouse_key_policy : jsonencode({ + "Version" : "2012-10-17", + "Statement" : [ + { + "Sid" : "Allow administration of the key", + "Effect" : "Allow", + "Principal" : { "AWS" : "${data.aws_caller_identity.current.arn}" }, + "Action" : "kms:*", + "Resource" : "*" + }, + { + "Sid" : "Allow ClickHouse Access", + "Effect" : "Allow", + "Principal" : { + "AWS" : "arn:aws:iam::576599896960:role/prod-kms-request-role" + }, + "Action" : [ + "kms:GetPublicKey", + "kms:Decrypt", + "kms:GenerateDataKeyPair", + "kms:Encrypt", + "kms:GetKeyRotationStatus", + "kms:GenerateDataKey", + "kms:DescribeKey" + ], + "Resource" : "*" + }, + ] + }) + + tags = { + Name = "wandb-kms-clickhouse-key" + } +} + + + +resource "aws_kms_alias" "clickhouse_key" { + name = "alias/${var.clickhouse_key_alias}" + target_key_id = aws_kms_key.clickhouse_key.key_id +} + + +resource "aws_kms_grant" "clickhouse" { + count = var.iam_principal_arn == "" ? 0 : 1 + + grantee_principal = var.iam_principal_arn + key_id = aws_kms_key.clickhouse_key.key_id + operations = [ + "Decrypt", + "DescribeKey", + "Encrypt", + "GenerateDataKey", + "GenerateDataKeyPair", + "GenerateDataKeyPairWithoutPlaintext", + "GenerateDataKeyPairWithoutPlaintext", + "ReEncryptFrom", + "ReEncryptTo", + ] +} diff --git a/modules/kms/outputs.tf b/modules/kms/outputs.tf index 20172f8f..1278a569 100644 --- a/modules/kms/outputs.tf +++ b/modules/kms/outputs.tf @@ -1,4 +1,10 @@ output "key" { value = aws_kms_key.key - description = "The KMS key used to encrypt data." -} \ No newline at end of file + description = "The KMS key used to encrypt Models data." +} + + +output "clickhouse_key" { + value = aws_kms_key.clickhouse_key + description = "The KMS key used to encrypt Weave data in Clickhouse." +} diff --git a/modules/kms/variables.tf b/modules/kms/variables.tf index c3dce5b6..91da5cf9 100644 --- a/modules/kms/variables.tf +++ b/modules/kms/variables.tf @@ -18,4 +18,15 @@ variable "key_policy" { description = "The policy that will define the permissions for the kms key." type = string default = "" -} \ No newline at end of file +} + +variable "clickhouse_key_alias" { + description = "The key alias for AWS KMS Customer managed key." + type = string +} + +variable "clickhouse_key_policy" { + description = "The policy that will define the permissions for the kms clickhouse key." + type = string + default = "" +} diff --git a/modules/networking/main.tf b/modules/networking/main.tf index 21382052..6f2a628b 100644 --- a/modules/networking/main.tf +++ b/modules/networking/main.tf @@ -37,3 +37,13 @@ module "vpc" { "kubernetes.io/role/elb" = "1" } } + +resource "aws_vpc_endpoint" "clickhouse" { + count = var.create_vpc && var.clickhouse_endpoint_service_id != "" ? 1 : 0 + + vpc_id = module.vpc.vpc_id + service_name = var.clickhouse_endpoint_service_id + vpc_endpoint_type = "Interface" + subnet_ids = module.vpc.private_subnets + private_dns_enabled = true +} diff --git a/modules/networking/variables.tf b/modules/networking/variables.tf index 9cf1d794..639c35c1 100644 --- a/modules/networking/variables.tf +++ b/modules/networking/variables.tf @@ -61,4 +61,10 @@ variable "amazon_side_asn" { description = "The Autonomous System Number (ASN) for the Amazon side of the gateway. By default the virtual private gateway is created with the current default Amazon ASN." type = string default = "64512" -} \ No newline at end of file +} + +variable "clickhouse_endpoint_service_id" { + description = "The ID of the Clickhouse service endpoint" + type = string + default = "" +} diff --git a/outputs.tf b/outputs.tf index 188e49f1..aa7d22cd 100644 --- a/outputs.tf +++ b/outputs.tf @@ -52,6 +52,12 @@ output "kms_key_arn" { description = "The Amazon Resource Name of the KMS key used to encrypt data at rest." } +output "kms_clickhouse_key_arn" { + value = local.clickhouse_kms_key + description = "The Amazon Resource Name of the KMS key used to encrypt Weave data at rest in Clickhouse." + +} + output "network_id" { value = local.network_id description = "The identity of the VPC in which resources are deployed." diff --git a/variables.tf b/variables.tf index fd73db38..ddede192 100644 --- a/variables.tf +++ b/variables.tf @@ -199,6 +199,18 @@ variable "kms_key_policy" { default = "" } +variable "kms_clickhouse_key_alias" { + type = string + description = "KMS key alias for AWS KMS Customer managed key used by Clickhouse CMEK." + default = null +} + +variable "kms_clickhouse_key_policy" { + type = string + description = "The policy that will define the permissions for the clickhouse kms key." + default = "" +} + ########################################## # Network # ########################################## @@ -474,3 +486,9 @@ variable "yace_sa_name" { type = string default = "wandb-yace" } + +variable "clickhouse_endpoint_service_id" { + type = string + description = "The service ID of the VPC endpoint service for Clickhouse." + default = "" +} From 588b1c0975ae8df7912c964f2d17294b2f15e9c6 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 17 Jul 2024 23:06:08 +0000 Subject: [PATCH 52/72] chore(release): version 4.21.0 [skip ci] ## [4.21.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.3...v4.21.0) (2024-07-17) ### Features * Set up KMS key for clickhouse CMEK and endpoint for PL ([#243](https://github.com/wandb/terraform-aws-wandb/issues/243)) ([1d2fb92](https://github.com/wandb/terraform-aws-wandb/commit/1d2fb921792019b6356e0f89b7c117dda168339a)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ff8e3ff..55a41327 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.21.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.3...v4.21.0) (2024-07-17) + + +### Features + +* Set up KMS key for clickhouse CMEK and endpoint for PL ([#243](https://github.com/wandb/terraform-aws-wandb/issues/243)) ([1d2fb92](https://github.com/wandb/terraform-aws-wandb/commit/1d2fb921792019b6356e0f89b7c117dda168339a)) + ### [4.20.3](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.2...v4.20.3) (2024-07-11) From 42d64bae1847a6d26b16bbf46cd341a39389ad0f Mon Sep 17 00:00:00 2001 From: Nick Penaranda Date: Wed, 17 Jul 2024 20:37:18 -0400 Subject: [PATCH 53/72] fix: Don't create KMS key and related resources for CH by default (#244) * fix: Don't create KMS key and related resources for CH by default * fix * guess what, straight to fix --- main.tf | 3 ++- modules/kms/main.tf | 10 +++++++--- modules/kms/outputs.tf | 2 +- modules/kms/variables.tf | 7 +++++++ variables.tf | 8 +++++++- 5 files changed, 24 insertions(+), 6 deletions(-) diff --git a/main.tf b/main.tf index b3043a2e..e45ac467 100644 --- a/main.tf +++ b/main.tf @@ -6,6 +6,7 @@ module "kms" { key_alias = var.kms_key_alias == null ? "${var.namespace}-kms-alias" : var.kms_key_alias key_policy = var.kms_key_policy + create_clickhouse_key = var.enable_clickhouse clickhouse_key_alias = var.kms_clickhouse_key_alias == null ? "${var.namespace}-kms-clickhouse-alias" : var.kms_clickhouse_key_alias clickhouse_key_policy = var.kms_clickhouse_key_policy } @@ -13,7 +14,7 @@ module "kms" { locals { default_kms_key = module.kms.key.arn - clickhouse_kms_key = module.kms.clickhouse_key.arn + clickhouse_kms_key = var.enable_clickhouse ? module.kms.clickhouse_key.arn : null s3_kms_key_arn = length(var.bucket_kms_key_arn) > 0 ? var.bucket_kms_key_arn : local.default_kms_key database_kms_key_arn = length(var.database_kms_key_arn) > 0 ? var.database_kms_key_arn : local.default_kms_key database_performance_insights_kms_key_arn = length(var.database_performance_insights_kms_key_arn) > 0 ? var.database_performance_insights_kms_key_arn : local.default_kms_key diff --git a/modules/kms/main.tf b/modules/kms/main.tf index d600ae34..df115510 100644 --- a/modules/kms/main.tf +++ b/modules/kms/main.tf @@ -91,6 +91,8 @@ resource "aws_kms_grant" "main" { } resource "aws_kms_key" "clickhouse_key" { + count = var.create_clickhouse_key ? 1 : 0 + deletion_window_in_days = var.key_deletion_window description = "AWS KMS Customer-managed key to encrypt Weave resources in Clickhouse" key_usage = "ENCRYPT_DECRYPT" @@ -133,16 +135,18 @@ resource "aws_kms_key" "clickhouse_key" { resource "aws_kms_alias" "clickhouse_key" { + count = var.create_clickhouse_key ? 1 : 0 + name = "alias/${var.clickhouse_key_alias}" - target_key_id = aws_kms_key.clickhouse_key.key_id + target_key_id = aws_kms_key.clickhouse_key[0].key_id } resource "aws_kms_grant" "clickhouse" { - count = var.iam_principal_arn == "" ? 0 : 1 + count = !var.create_clickhouse_key && (var.iam_principal_arn == "") ? 0 : 1 grantee_principal = var.iam_principal_arn - key_id = aws_kms_key.clickhouse_key.key_id + key_id = aws_kms_key.clickhouse_key[0].key_id operations = [ "Decrypt", "DescribeKey", diff --git a/modules/kms/outputs.tf b/modules/kms/outputs.tf index 1278a569..5b09005d 100644 --- a/modules/kms/outputs.tf +++ b/modules/kms/outputs.tf @@ -5,6 +5,6 @@ output "key" { output "clickhouse_key" { - value = aws_kms_key.clickhouse_key + value = var.create_clickhouse_key ? aws_kms_key.clickhouse_key[0] : null description = "The KMS key used to encrypt Weave data in Clickhouse." } diff --git a/modules/kms/variables.tf b/modules/kms/variables.tf index 91da5cf9..bcafbd3f 100644 --- a/modules/kms/variables.tf +++ b/modules/kms/variables.tf @@ -20,9 +20,16 @@ variable "key_policy" { default = "" } +variable "create_clickhouse_key" { + description = "Whether to create a KMS key for Clickhouse CMEK." + type = bool + default = false +} + variable "clickhouse_key_alias" { description = "The key alias for AWS KMS Customer managed key." type = string + default = "wandb-kms-clickhouse-key" } variable "clickhouse_key_policy" { diff --git a/variables.tf b/variables.tf index ddede192..09de8e77 100644 --- a/variables.tf +++ b/variables.tf @@ -487,8 +487,14 @@ variable "yace_sa_name" { default = "wandb-yace" } +variable "enable_clickhouse" { + type = bool + description = "Provision clickhouse resources" + default = false +} + variable "clickhouse_endpoint_service_id" { type = string - description = "The service ID of the VPC endpoint service for Clickhouse." + description = "The service ID of the VPC endpoint service for Clickhouse" default = "" } From dddc121e873e5863c1d36f43676d412c9582ef4a Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Thu, 18 Jul 2024 00:37:51 +0000 Subject: [PATCH 54/72] chore(release): version 4.21.1 [skip ci] ### [4.21.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.0...v4.21.1) (2024-07-18) ### Bug Fixes * Don't create KMS key and related resources for CH by default ([#244](https://github.com/wandb/terraform-aws-wandb/issues/244)) ([42d64ba](https://github.com/wandb/terraform-aws-wandb/commit/42d64bae1847a6d26b16bbf46cd341a39389ad0f)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55a41327..8e7d8742 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.21.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.0...v4.21.1) (2024-07-18) + + +### Bug Fixes + +* Don't create KMS key and related resources for CH by default ([#244](https://github.com/wandb/terraform-aws-wandb/issues/244)) ([42d64ba](https://github.com/wandb/terraform-aws-wandb/commit/42d64bae1847a6d26b16bbf46cd341a39389ad0f)) + ## [4.21.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.20.3...v4.21.0) (2024-07-17) From 78d9be7c0b1126aada5e5df7539ae47ecc6b3368 Mon Sep 17 00:00:00 2001 From: Nick Penaranda Date: Thu, 18 Jul 2024 13:26:28 -0400 Subject: [PATCH 55/72] fix: Condition to create kms.aws_kms_grant.clickhouse was incorrect (#245) --- modules/kms/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/kms/main.tf b/modules/kms/main.tf index df115510..4b0f6edd 100644 --- a/modules/kms/main.tf +++ b/modules/kms/main.tf @@ -143,7 +143,7 @@ resource "aws_kms_alias" "clickhouse_key" { resource "aws_kms_grant" "clickhouse" { - count = !var.create_clickhouse_key && (var.iam_principal_arn == "") ? 0 : 1 + count = var.create_clickhouse_key && (var.iam_principal_arn != "") ? 1 : 0 grantee_principal = var.iam_principal_arn key_id = aws_kms_key.clickhouse_key[0].key_id From fd4d0e11aa0818cb14c3fb309b27b6391d5363b8 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Thu, 18 Jul 2024 17:26:56 +0000 Subject: [PATCH 56/72] chore(release): version 4.21.2 [skip ci] ### [4.21.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.1...v4.21.2) (2024-07-18) ### Bug Fixes * Condition to create kms.aws_kms_grant.clickhouse was incorrect ([#245](https://github.com/wandb/terraform-aws-wandb/issues/245)) ([78d9be7](https://github.com/wandb/terraform-aws-wandb/commit/78d9be7c0b1126aada5e5df7539ae47ecc6b3368)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e7d8742..ffe532da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.21.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.1...v4.21.2) (2024-07-18) + + +### Bug Fixes + +* Condition to create kms.aws_kms_grant.clickhouse was incorrect ([#245](https://github.com/wandb/terraform-aws-wandb/issues/245)) ([78d9be7](https://github.com/wandb/terraform-aws-wandb/commit/78d9be7c0b1126aada5e5df7539ae47ecc6b3368)) + ### [4.21.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.0...v4.21.1) (2024-07-18) From 47871c846c13ed93ffa71b68c8177f0d2d99d7cf Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+zacharyblasczyk@users.noreply.github.com> Date: Tue, 23 Jul 2024 12:12:59 -0500 Subject: [PATCH 57/72] fix: YACE scoping (#246) * fix: YACE scoping * fix to follow https://github.com/nerdswords/yet-another-cloudwatch-exporter/blob/master/docs/configuration.md#discovery_job_config --- main.tf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main.tf b/main.tf index e45ac467..6ecb7d05 100644 --- a/main.tf +++ b/main.tf @@ -335,6 +335,9 @@ module "wandb" { install = true regions = [data.aws_region.current.name] serviceAccount = { annotations = { "eks.amazonaws.com/role-arn" = module.iam_role[0].role_arn } } + searchTags = { + "Namespace" = var.namespace + } } : { install = false regions = [] From 6d12f7e277547f40f575fe9713c8d63e2975cbd1 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Tue, 23 Jul 2024 17:13:30 +0000 Subject: [PATCH 58/72] chore(release): version 4.21.3 [skip ci] ### [4.21.3](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.2...v4.21.3) (2024-07-23) ### Bug Fixes * YACE scoping ([#246](https://github.com/wandb/terraform-aws-wandb/issues/246)) ([47871c8](https://github.com/wandb/terraform-aws-wandb/commit/47871c846c13ed93ffa71b68c8177f0d2d99d7cf)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffe532da..f6dfadfd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.21.3](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.2...v4.21.3) (2024-07-23) + + +### Bug Fixes + +* YACE scoping ([#246](https://github.com/wandb/terraform-aws-wandb/issues/246)) ([47871c8](https://github.com/wandb/terraform-aws-wandb/commit/47871c846c13ed93ffa71b68c8177f0d2d99d7cf)) + ### [4.21.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.1...v4.21.2) (2024-07-18) From cf419bdd8d1a3c3996738bbfe8b292579db59d2f Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+zacharyblasczyk@users.noreply.github.com> Date: Tue, 23 Jul 2024 12:21:53 -0500 Subject: [PATCH 59/72] fix: Yace Yace Yace (#247) --- main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/main.tf b/main.tf index 6ecb7d05..74aa681b 100644 --- a/main.tf +++ b/main.tf @@ -342,6 +342,7 @@ module "wandb" { install = false regions = [] serviceAccount = {} + searchTags = {} } otel = { From 798d2f26f16e28aea33fb5f80878b5f56d7dc1a0 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Tue, 23 Jul 2024 17:22:45 +0000 Subject: [PATCH 60/72] chore(release): version 4.21.4 [skip ci] ### [4.21.4](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.3...v4.21.4) (2024-07-23) ### Bug Fixes * Yace Yace Yace ([#247](https://github.com/wandb/terraform-aws-wandb/issues/247)) ([cf419bd](https://github.com/wandb/terraform-aws-wandb/commit/cf419bdd8d1a3c3996738bbfe8b292579db59d2f)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6dfadfd..5eecea78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.21.4](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.3...v4.21.4) (2024-07-23) + + +### Bug Fixes + +* Yace Yace Yace ([#247](https://github.com/wandb/terraform-aws-wandb/issues/247)) ([cf419bd](https://github.com/wandb/terraform-aws-wandb/commit/cf419bdd8d1a3c3996738bbfe8b292579db59d2f)) + ### [4.21.3](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.2...v4.21.3) (2024-07-23) From 48131b79219071b0a1311bbb5bc468a62c51e266 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+zacharyblasczyk@users.noreply.github.com> Date: Wed, 24 Jul 2024 11:17:04 -0500 Subject: [PATCH 61/72] fix: Use bucket KMS key arn if provided for W&B managed bucket, always use that key even if empty for customer provided buckets (#248) --- main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.tf b/main.tf index 74aa681b..45f04812 100644 --- a/main.tf +++ b/main.tf @@ -15,10 +15,10 @@ locals { default_kms_key = module.kms.key.arn clickhouse_kms_key = var.enable_clickhouse ? module.kms.clickhouse_key.arn : null - s3_kms_key_arn = length(var.bucket_kms_key_arn) > 0 ? var.bucket_kms_key_arn : local.default_kms_key database_kms_key_arn = length(var.database_kms_key_arn) > 0 ? var.database_kms_key_arn : local.default_kms_key database_performance_insights_kms_key_arn = length(var.database_performance_insights_kms_key_arn) > 0 ? var.database_performance_insights_kms_key_arn : local.default_kms_key use_external_bucket = var.bucket_name != "" + s3_kms_key_arn = local.use_external_bucket || var.bucket_kms_key_arn != "" ? var.bucket_kms_key_arn : local.default_kms_key use_internal_queue = local.use_external_bucket || var.use_internal_queue } From 00f6ff5d2941ee0c3a0582fc1de35d1cf17704a5 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 24 Jul 2024 16:17:33 +0000 Subject: [PATCH 62/72] chore(release): version 4.21.5 [skip ci] ### [4.21.5](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.4...v4.21.5) (2024-07-24) ### Bug Fixes * Use bucket KMS key arn if provided for W&B managed bucket, always use that key even if empty for customer provided buckets ([#248](https://github.com/wandb/terraform-aws-wandb/issues/248)) ([48131b7](https://github.com/wandb/terraform-aws-wandb/commit/48131b79219071b0a1311bbb5bc468a62c51e266)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eecea78..2d4eb122 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.21.5](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.4...v4.21.5) (2024-07-24) + + +### Bug Fixes + +* Use bucket KMS key arn if provided for W&B managed bucket, always use that key even if empty for customer provided buckets ([#248](https://github.com/wandb/terraform-aws-wandb/issues/248)) ([48131b7](https://github.com/wandb/terraform-aws-wandb/commit/48131b79219071b0a1311bbb5bc468a62c51e266)) + ### [4.21.4](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.3...v4.21.4) (2024-07-23) From d8fa06f89da48443cb9fe0a45f491e5c13bb41cc Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+zacharyblasczyk@users.noreply.github.com> Date: Wed, 24 Jul 2024 12:17:36 -0500 Subject: [PATCH 63/72] fix: Always let the node role have access to the `default_kms_key` (#249) * fix: Use bucket KMS key arn if provided for W&B managed bucket, always use that key even if empty for customer provided buckets * fix: Always let the node role have access to the `default_kms_key` --- main.tf | 7 +++++-- modules/app_eks/iam-policy-docs.tf | 2 +- modules/app_eks/variables.tf | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/main.tf b/main.tf index 45f04812..64d74304 100644 --- a/main.tf +++ b/main.tf @@ -12,7 +12,6 @@ module "kms" { } locals { - default_kms_key = module.kms.key.arn clickhouse_kms_key = var.enable_clickhouse ? module.kms.clickhouse_key.arn : null database_kms_key_arn = length(var.database_kms_key_arn) > 0 ? var.database_kms_key_arn : local.default_kms_key @@ -141,7 +140,11 @@ module "app_eks" { map_roles = var.kubernetes_map_roles map_users = var.kubernetes_map_users - bucket_kms_key_arn = local.s3_kms_key_arn + bucket_kms_key_arns = compact([ + local.default_kms_key, + var.bucket_kms_key_arn != "" && var.bucket_kms_key_arn != null ? var.bucket_kms_key_arn : null + ]) + bucket_arn = data.aws_s3_bucket.file_storage.arn bucket_sqs_queue_arn = local.use_internal_queue ? null : data.aws_sqs_queue.file_storage.0.arn diff --git a/modules/app_eks/iam-policy-docs.tf b/modules/app_eks/iam-policy-docs.tf index 4e7f27b4..83b6aa1e 100644 --- a/modules/app_eks/iam-policy-docs.tf +++ b/modules/app_eks/iam-policy-docs.tf @@ -35,7 +35,7 @@ data "aws_iam_policy_document" "node_kms" { "kms:DescribeKey" ] effect = "Allow" - resources = var.bucket_kms_key_arn == "" || var.bucket_kms_key_arn == null ? ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${aws_iam_role.node.name}"] : [var.bucket_kms_key_arn] + resources = var.bucket_kms_key_arns } } diff --git a/modules/app_eks/variables.tf b/modules/app_eks/variables.tf index 64e6df6e..ff2d4ce6 100644 --- a/modules/app_eks/variables.tf +++ b/modules/app_eks/variables.tf @@ -3,9 +3,9 @@ variable "bucket_arn" { nullable = false } -variable "bucket_kms_key_arn" { +variable "bucket_kms_key_arns" { description = "The Amazon Resource Name of the KMS key with which S3 storage bucket objects will be encrypted." - type = string + type = list(string) } variable "fqdn" { From 9626c4819054439fbfebd9ceb34ca7045f3d36a5 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 24 Jul 2024 17:18:03 +0000 Subject: [PATCH 64/72] chore(release): version 4.21.6 [skip ci] ### [4.21.6](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.5...v4.21.6) (2024-07-24) ### Bug Fixes * Always let the node role have access to the `default_kms_key` ([#249](https://github.com/wandb/terraform-aws-wandb/issues/249)) ([d8fa06f](https://github.com/wandb/terraform-aws-wandb/commit/d8fa06f89da48443cb9fe0a45f491e5c13bb41cc)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d4eb122..3cff72cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.21.6](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.5...v4.21.6) (2024-07-24) + + +### Bug Fixes + +* Always let the node role have access to the `default_kms_key` ([#249](https://github.com/wandb/terraform-aws-wandb/issues/249)) ([d8fa06f](https://github.com/wandb/terraform-aws-wandb/commit/d8fa06f89da48443cb9fe0a45f491e5c13bb41cc)) + ### [4.21.5](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.4...v4.21.5) (2024-07-24) From 1c608185dc6dd68d560d7715060a65fc8719c895 Mon Sep 17 00:00:00 2001 From: Daniel Panzella Date: Wed, 31 Jul 2024 15:05:10 -0700 Subject: [PATCH 65/72] feat: Bump operator image and chart versions (#250) --- main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.tf b/main.tf index 64d74304..35a87071 100644 --- a/main.tf +++ b/main.tf @@ -264,8 +264,8 @@ module "wandb" { module.app_eks, module.redis, ] - operator_chart_version = "1.1.2" - controller_image_tag = "1.10.1" + controller_image_tag = "1.12.0" + operator_chart_version = "1.2.0" spec = { values = { From a511ae6475a4aea4495e84dee531dd81e6073e36 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Wed, 31 Jul 2024 22:05:37 +0000 Subject: [PATCH 66/72] chore(release): version 4.22.0 [skip ci] ## [4.22.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.6...v4.22.0) (2024-07-31) ### Features * Bump operator image and chart versions ([#250](https://github.com/wandb/terraform-aws-wandb/issues/250)) ([1c60818](https://github.com/wandb/terraform-aws-wandb/commit/1c608185dc6dd68d560d7715060a65fc8719c895)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3cff72cb..91d3e697 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [4.22.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.6...v4.22.0) (2024-07-31) + + +### Features + +* Bump operator image and chart versions ([#250](https://github.com/wandb/terraform-aws-wandb/issues/250)) ([1c60818](https://github.com/wandb/terraform-aws-wandb/commit/1c608185dc6dd68d560d7715060a65fc8719c895)) + ### [4.21.6](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.5...v4.21.6) (2024-07-24) From e8987f29fdde5f9164a1c0133ea2d6e672a1fa5d Mon Sep 17 00:00:00 2001 From: Daniel Panzella Date: Wed, 31 Jul 2024 17:15:49 -0700 Subject: [PATCH 67/72] fix: Bump operator chart versions (#252) --- main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.tf b/main.tf index 35a87071..8718b0e3 100644 --- a/main.tf +++ b/main.tf @@ -265,7 +265,7 @@ module "wandb" { module.redis, ] controller_image_tag = "1.12.0" - operator_chart_version = "1.2.0" + operator_chart_version = "1.2.1" spec = { values = { From e7c7cecd2fed715c2b480aeb9795f37d321afc9e Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Thu, 1 Aug 2024 00:16:15 +0000 Subject: [PATCH 68/72] chore(release): version 4.22.1 [skip ci] ### [4.22.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.22.0...v4.22.1) (2024-08-01) ### Bug Fixes * Bump operator chart versions ([#252](https://github.com/wandb/terraform-aws-wandb/issues/252)) ([e8987f2](https://github.com/wandb/terraform-aws-wandb/commit/e8987f29fdde5f9164a1c0133ea2d6e672a1fa5d)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91d3e697..3357ae62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.22.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.22.0...v4.22.1) (2024-08-01) + + +### Bug Fixes + +* Bump operator chart versions ([#252](https://github.com/wandb/terraform-aws-wandb/issues/252)) ([e8987f2](https://github.com/wandb/terraform-aws-wandb/commit/e8987f29fdde5f9164a1c0133ea2d6e672a1fa5d)) + ## [4.22.0](https://github.com/wandb/terraform-aws-wandb/compare/v4.21.6...v4.22.0) (2024-07-31) From b46d6d2fbac25af8374bb3121b410f2d78935bfc Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+zacharyblasczyk@users.noreply.github.com> Date: Wed, 31 Jul 2024 21:10:07 -0500 Subject: [PATCH 69/72] fix: Test TF Pipeline (#253) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4a06731d..c379e37d 100644 --- a/README.md +++ b/README.md @@ -288,7 +288,7 @@ bucket_kms_key_arn ### In order to allow cross account KMS keys. we need to allow kms keys to be accessed by WandB account. -this can be donw by adding the following policy document. +This can be donw by adding the following policy document. ``` { From 83dcb02bf8067aedded453dee70063c10beee76c Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Thu, 1 Aug 2024 02:14:24 +0000 Subject: [PATCH 70/72] chore(release): version 4.22.2 [skip ci] ### [4.22.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.22.1...v4.22.2) (2024-08-01) ### Bug Fixes * Test TF Pipeline ([#253](https://github.com/wandb/terraform-aws-wandb/issues/253)) ([b46d6d2](https://github.com/wandb/terraform-aws-wandb/commit/b46d6d2fbac25af8374bb3121b410f2d78935bfc)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3357ae62..80d28d46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.22.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.22.1...v4.22.2) (2024-08-01) + + +### Bug Fixes + +* Test TF Pipeline ([#253](https://github.com/wandb/terraform-aws-wandb/issues/253)) ([b46d6d2](https://github.com/wandb/terraform-aws-wandb/commit/b46d6d2fbac25af8374bb3121b410f2d78935bfc)) + ### [4.22.1](https://github.com/wandb/terraform-aws-wandb/compare/v4.22.0...v4.22.1) (2024-08-01) From 9932e331a58ee0be67e90f7ffb6391f28c17eac1 Mon Sep 17 00:00:00 2001 From: Daniel Panzella Date: Thu, 1 Aug 2024 09:04:19 -0700 Subject: [PATCH 71/72] fix: Bump operator chart versions (#254) * fix: Bump operator chart versions --- main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.tf b/main.tf index 8718b0e3..0e5f86ab 100644 --- a/main.tf +++ b/main.tf @@ -265,7 +265,7 @@ module "wandb" { module.redis, ] controller_image_tag = "1.12.0" - operator_chart_version = "1.2.1" + operator_chart_version = "1.2.4" spec = { values = { From 2fb76be50b1377aad43a26dd401fae7cd33504eb Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Thu, 1 Aug 2024 16:04:46 +0000 Subject: [PATCH 72/72] chore(release): version 4.22.3 [skip ci] ### [4.22.3](https://github.com/wandb/terraform-aws-wandb/compare/v4.22.2...v4.22.3) (2024-08-01) ### Bug Fixes * Bump operator chart versions ([#254](https://github.com/wandb/terraform-aws-wandb/issues/254)) ([9932e33](https://github.com/wandb/terraform-aws-wandb/commit/9932e331a58ee0be67e90f7ffb6391f28c17eac1)) --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80d28d46..0ffa0b76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +### [4.22.3](https://github.com/wandb/terraform-aws-wandb/compare/v4.22.2...v4.22.3) (2024-08-01) + + +### Bug Fixes + +* Bump operator chart versions ([#254](https://github.com/wandb/terraform-aws-wandb/issues/254)) ([9932e33](https://github.com/wandb/terraform-aws-wandb/commit/9932e331a58ee0be67e90f7ffb6391f28c17eac1)) + ### [4.22.2](https://github.com/wandb/terraform-aws-wandb/compare/v4.22.1...v4.22.2) (2024-08-01)