diff --git a/examples/aks/aks_cluster_data/README.MD b/examples/aks/aks_cluster_data/README.MD new file mode 100644 index 00000000..cfa54aac --- /dev/null +++ b/examples/aks/aks_cluster_data/README.MD @@ -0,0 +1,23 @@ +# AKS and CAST AI example with CAST AI Autoscaler policies and additional Node Configurations +Following example shows how to onboard AKS cluster to CAST AI, configure [Autoscaler policies](https://docs.cast.ai/reference/policiesapi_upsertclusterpolicies) and additional [Node Configurations](https://docs.cast.ai/docs/node-configuration/). + +Example configuration should be analysed in the following order: +1. Use terraform datasource to fetch Virtual network - `vnet.tf` +2. Use terraform datasource to fetch AKS cluster - `aks.tf` +3. Create CAST AI related resources to connect AKS cluster to CAST AI, configure Autoscaler and Node Configurations - `castai.tf` + +# Usage +1. Rename `tf.vars.example` to `tf.vars` +2. Update `tf.vars` file with your cluster name, cluster region and CAST AI API token. +3. Initialize Terraform. Under example root folder run: +``` +terraform init +``` +4. Run Terraform apply: +``` +terraform apply -var-file=tf.vars +``` +5. To destroy resources created by this example: +``` +terraform destroy -var-file=tf.vars +``` diff --git a/examples/aks/aks_cluster_data/aks.tf b/examples/aks/aks_cluster_data/aks.tf new file mode 100644 index 00000000..7e0eae4d --- /dev/null +++ b/examples/aks/aks_cluster_data/aks.tf @@ -0,0 +1,5 @@ +# USE Existing AKS CLuster +data "azurerm_kubernetes_cluster" "this" { + name = var.cluster_name + resource_group_name = var.cluster_resource_group_name +} diff --git a/examples/aks/aks_cluster_data/castai.tf b/examples/aks/aks_cluster_data/castai.tf new file mode 100644 index 00000000..364bb5b5 --- /dev/null +++ b/examples/aks/aks_cluster_data/castai.tf @@ -0,0 +1,117 @@ +# 3. Connect AKS cluster to CAST AI in READ-ONLY mode. + +# Configure Data sources and providers required for CAST AI connection. +data "azurerm_subscription" "current" {} + +provider "castai" { + api_url = var.castai_api_url + api_token = var.castai_api_token +} + +provider "helm" { + kubernetes { + host = data.azurerm_kubernetes_cluster.this.kube_config.0.host + client_certificate = base64decode(data.azurerm_kubernetes_cluster.this.kube_config.0.client_certificate) + client_key = base64decode(data.azurerm_kubernetes_cluster.this.kube_config.0.client_key) + cluster_ca_certificate = base64decode(data.azurerm_kubernetes_cluster.this.kube_config.0.cluster_ca_certificate) + } +} + +# Configure AKS cluster connection to CAST AI using CAST AI aks-cluster module. +module "castai-aks-cluster" { + source = "castai/aks/castai" + + api_url = var.castai_api_url + + aks_cluster_name = var.cluster_name + aks_cluster_region = var.cluster_region + node_resource_group = data.azurerm_kubernetes_cluster.this.node_resource_group + resource_group = data.azurerm_kubernetes_cluster.this.resource_group_name + + delete_nodes_on_disconnect = var.delete_nodes_on_disconnect + + subscription_id = data.azurerm_subscription.current.subscription_id + tenant_id = data.azurerm_subscription.current.tenant_id + + default_node_configuration = module.castai-aks-cluster.castai_node_configurations["default"] + + node_configurations = { + default = { + disk_cpu_ratio = 25 + subnets = [data.azurerm_subnet.internal.id] + tags = var.tags + } + + test_node_config = { + disk_cpu_ratio = 25 + subnets = [data.azurerm_subnet.internal.id] + tags = var.tags + max_pods_per_node = 40 + } + } + + node_templates = { + spot_template = { + configuration_id = module.castai-aks-cluster.castai_node_configurations["default"] + should_taint = true + + + constraints = { + fallback_restore_rate_seconds = 1800 + spot = true + use_spot_fallbacks = true + min_cpu = 4 + max_cpu = 100 + instance_families = { + exclude = ["standard_DPLSv5"] + } + compute_optimized = false + storage_optimized = false + } + } + } + + // Configure Autoscaler policies as per API specification https://api.cast.ai/v1/spec/#/PoliciesAPI/PoliciesAPIUpsertClusterPolicies. + // Here: + // - unschedulablePods - Unscheduled pods policy + // - spotInstances - Spot instances configuration + // - nodeDownscaler - Node deletion policy + autoscaler_policies_json = <<-EOT + { + "enabled": true, + "unschedulablePods": { + "enabled": true + }, + "spotInstances": { + "enabled": true, + "clouds": ["azure"], + "spotBackups": { + "enabled": true + }, + "spotDiversityEnabled": false + }, + "nodeDownscaler": { + "enabled": true, + "emptyNodes": { + "enabled": true + }, + "evictor": { + "aggressiveMode": false, + "cycleInterval": "5m10s", + "dryRun": false, + "enabled": true, + "nodeGracePeriodMinutes": 10, + "scopedMode": false + } + }, + "clusterLimits": { + "cpu": { + "maxCores": 20, + "minCores": 1 + }, + "enabled": true + } + } + EOT + +} diff --git a/examples/aks/aks_cluster_data/providers.tf b/examples/aks/aks_cluster_data/providers.tf new file mode 100644 index 00000000..bdab1922 --- /dev/null +++ b/examples/aks/aks_cluster_data/providers.tf @@ -0,0 +1,8 @@ +# Following providers required by AKS and Vnet resources. +provider "azurerm" { + features {} +} + +provider "azuread" { + tenant_id = data.azurerm_subscription.current.tenant_id +} diff --git a/examples/aks/aks_cluster_data/tf.vars.example b/examples/aks/aks_cluster_data/tf.vars.example new file mode 100644 index 00000000..110588b0 --- /dev/null +++ b/examples/aks/aks_cluster_data/tf.vars.example @@ -0,0 +1,8 @@ +cluster_name = "" +cluster_region = "" +castai_api_token = "" + +subnet_name = "" +vnet_name = " +subnet_resource_group_name = " +cluster_resource_group_name = "" diff --git a/examples/aks/aks_cluster_data/variables.tf b/examples/aks/aks_cluster_data/variables.tf new file mode 100644 index 00000000..db8d3a59 --- /dev/null +++ b/examples/aks/aks_cluster_data/variables.tf @@ -0,0 +1,54 @@ +# AKS cluster variables. +variable "cluster_name" { + type = string + description = "Name of the AKS cluster, resources will be created for." +} + +variable "cluster_region" { + type = string + description = "Region of the AKS cluster, resources will be created for." +} + +variable "castai_api_url" { + type = string + description = "URL of alternative CAST AI API to be used during development or testing" + default = "https://api.cast.ai" +} + +# Variables required for connecting EKS cluster to CAST AI +variable "castai_api_token" { + type = string + description = "CAST AI API token created in console.cast.ai API Access keys section" +} + +variable "delete_nodes_on_disconnect" { + type = bool + description = "Optional parameter, if set to true - CAST AI provisioned nodes will be deleted from cloud on cluster disconnection. For production use it is recommended to set it to false." + default = true +} + +variable "tags" { + type = map(any) + description = "Optional tags for new cluster nodes. This parameter applies only to new nodes - tags for old nodes are not reconciled." + default = {} +} + +variable "cluster_resource_group_name" { + type = string + description = "Name of resource group in which cluster was created" +} + +variable "subnet_name" { + type = string + description = "Name of subnet used for provisioning CAST AI nodes" +} + +variable "vnet_name" { + type = string + description = "Name of virtual network used for provisioning CAST AI nodes" +} + +variable "subnet_resource_group_name" { + type = string + description = "Name of resource group in which vnet was created" +} \ No newline at end of file diff --git a/examples/aks/aks_cluster_data/versions.tf b/examples/aks/aks_cluster_data/versions.tf new file mode 100644 index 00000000..5106c2c1 --- /dev/null +++ b/examples/aks/aks_cluster_data/versions.tf @@ -0,0 +1,14 @@ +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + } + azuread = { + source = "hashicorp/azuread" + } + castai = { + source = "castai/castai" + } + } + required_version = ">= 0.13" +} diff --git a/examples/aks/aks_cluster_data/vnet.tf b/examples/aks/aks_cluster_data/vnet.tf new file mode 100644 index 00000000..6f3b7d22 --- /dev/null +++ b/examples/aks/aks_cluster_data/vnet.tf @@ -0,0 +1,7 @@ +# 1. read virtual network and resource group for the cluster. + +data "azurerm_subnet" "internal" { + name = var.subnet_name + virtual_network_name = var.vnet_name + resource_group_name = var.subnet_resource_group_name +} \ No newline at end of file