Skip to content

Commit

Permalink
Add access entry handling & restructure folder (#382)
Browse files Browse the repository at this point in the history
  • Loading branch information
sarvesh-cast authored Sep 25, 2024
1 parent 3d9340f commit f1d8eca
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 24 deletions.
2 changes: 1 addition & 1 deletion docs/resources/autoscaler.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions examples/eks/eks_cluster_existing/README.MD
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## Existing EKS cluster and CAST AI example with CAST AI Autoscaler policies and additional Node Configurations for
## Existing EKS cluster and CAST AI example with CAST AI Autoscaler policies and additional Node Configurations

Following example shows how to onboard existing EKS cluster to CAST AI, configure [Autoscaler policies](https://docs.cast.ai/reference/policiesapi_upsertclusterpolicies) and additional [Node Configurations](https://docs.cast.ai/docs/node-configuration/).

Expand Down Expand Up @@ -36,7 +36,9 @@ terraform destroy -var-file=tf.vars

> **Note**
>
> If you are onboarding existing cluster to CAST AI you need to also update [aws-auth](https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html) configmap. In the configmap instance profile
> If you are onboarding existing cluster to CAST AI
1. If your eks cluster authentication mode is API/API_CONFIGMAP - TF will update role in IAM access entries (No user action needed)
2. If your eks cluster authentication mode is CONFIGMAP - you need to also update [aws-auth](https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html) configmap. In the configmap instance profile
> used by CAST AI has to be present. Example of entry can be found [here](https://github.com/castai/terraform-provider-castai/blob/157babd57b0977f499eb162e9bee27bee51d292a/examples/eks/eks_cluster_autoscaler_polices/eks.tf#L28-L38).

Expand Down
84 changes: 64 additions & 20 deletions examples/eks/eks_cluster_existing/castai.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,15 @@ resource "castai_eks_user_arn" "castai_user_arn" {
cluster_id = castai_eks_clusterid.cluster_id.id
}


provider "castai" {
api_url = var.castai_api_url
api_token = var.castai_api_token
locals {
access_entry = can(regex("API", data.aws_eks_cluster.existing_cluster.access_config[0].authentication_mode))
}

provider "helm" {
kubernetes {
host = data.aws_eks_cluster.existing_cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.existing_cluster.certificate_authority.0.data)
exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed.
args = ["eks", "get-token", "--cluster-name", var.cluster_name, "--region", var.cluster_region, "--profile", var.profile]
}
}
resource "aws_eks_access_entry" "access_entry" {
count = local.access_entry ? 1 : 0
cluster_name = var.cluster_name
principal_arn = module.castai-eks-role-iam.instance_profile_role_arn
type = "EC2_LINUX"
}

# Create AWS IAM policies and a user to connect to CAST AI.
Expand Down Expand Up @@ -65,7 +57,6 @@ module "castai-eks-cluster" {
delete_nodes_on_disconnect = var.delete_nodes_on_disconnect

default_node_configuration = module.castai-eks-cluster.castai_node_configurations["default"]

node_configurations = {
default = {
subnets = var.subnets
Expand Down Expand Up @@ -99,8 +90,58 @@ module "castai-eks-cluster" {
spot_interruption_predictions_type = "aws-rebalance-recommendations"
}
}
}
example_spot_template = {
configuration_id = module.castai-eks-cluster.castai_node_configurations["default"]
is_enabled = true
should_taint = true

custom_labels = {
custom-label-key-1 = "custom-label-value-1",
custom-label-key-2 = "custom-label-value-2"
}

custom_taints = [
{
key = "custom-taint-key-1"
value = "custom-taint-value-1"
},
{
key = "custom-taint-key-2"
value = "custom-taint-value-2"
}
]

constraints = {
fallback_restore_rate_seconds = 1800
spot = true
use_spot_fallbacks = true
min_cpu = 4
max_cpu = 100
instance_families = {
exclude = ["m5"]
}
is_gpu_only = false

# Optional: define custom priority for instances selection.
#
# 1. Prioritize C5a and C5ad spot instances above all else, regardless of price.
# 2. If C5a is not available, try C6a family.
custom_priority = [
{
instance_families = ["c5a", "c5ad"]
spot = true
},
{
instance_families = ["c6a"]
spot = true
}
# 3. instances not matching any of custom priority groups will be tried after
# nothing matches from priority groups.
]
}
}
}
# Autoscaling & evictor setting
autoscaler_settings = {
enabled = false
is_scoped_mode = false
Expand All @@ -118,10 +159,9 @@ module "castai-eks-cluster" {
}

evictor = {
aggressive_mode = false
cycle_interval = "5m10s"
dry_run = false
enabled = false
aggressive_mode = false
cycle_interval = "60s"
node_grace_period_minutes = 10
scoped_mode = false
}
Expand All @@ -141,6 +181,10 @@ module "castai-eks-cluster" {
}
}
}
# Installs Workload autoscaler
install_workload_autoscaler = true
# Installs network monitor
install_egressd = true

# depends_on helps Terraform with creating proper dependencies graph in case of resource creation and in this case destroy.
# module "castai-eks-cluster" has to be destroyed before module "castai-eks-role-iam".
Expand Down
3 changes: 3 additions & 0 deletions examples/eks/eks_cluster_existing/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
output "eks_cluster_authentication_mode" {
value = data.aws_eks_cluster.existing_cluster.access_config[0].authentication_mode
}
17 changes: 17 additions & 0 deletions examples/eks/eks_cluster_existing/providers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ provider "aws" {
profile = var.profile
}

provider "castai" {
api_url = var.castai_api_url
api_token = var.castai_api_token
}

provider "kubernetes" {
host = data.aws_eks_cluster.existing_cluster.endpoint
Expand All @@ -15,3 +19,16 @@ provider "kubernetes" {
args = ["eks", "get-token", "--cluster-name", var.cluster_name, "--region", var.cluster_region, "--profile", var.profile]
}
}

provider "helm" {
kubernetes {
host = data.aws_eks_cluster.existing_cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.existing_cluster.certificate_authority.0.data)
exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed.
args = ["eks", "get-token", "--cluster-name", var.cluster_name, "--region", var.cluster_region, "--profile", var.profile]
}
}
}
2 changes: 1 addition & 1 deletion examples/resources/castai_autoscaler/resource.tf
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ resource "castai_autoscaler" "castai_autoscaler_policy" {
dry_run = false
aggressive_mode = false
scoped_mode = false
cycle_interval = "300s"
cycle_interval = "60s"
node_grace_period_minutes = 10
pod_eviction_failure_back_off_interval = "30s"
ignore_pod_disruption_budgets = false
Expand Down

0 comments on commit f1d8eca

Please sign in to comment.