Skip to content

Commit

Permalink
chore: add example of onboarding AKS cluster using existing cluster
Browse files Browse the repository at this point in the history
  • Loading branch information
aldor007 committed Jul 18, 2023
1 parent 91badab commit 06997b3
Show file tree
Hide file tree
Showing 8 changed files with 236 additions and 0 deletions.
23 changes: 23 additions & 0 deletions examples/aks/aks_cluster_data/README.MD
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# AKS and CAST AI example with CAST AI Autoscaler policies and additional Node Configurations
Following example shows how to onboard AKS cluster to CAST AI, configure [Autoscaler policies](https://docs.cast.ai/reference/policiesapi_upsertclusterpolicies) and additional [Node Configurations](https://docs.cast.ai/docs/node-configuration/).

Example configuration should be analysed in the following order:
1. Use terraform datasource to fetch Virtual network - `vnet.tf`
2. Use terraform datasource to fetch AKS cluster - `aks.tf`
3. Create CAST AI related resources to connect AKS cluster to CAST AI, configure Autoscaler and Node Configurations - `castai.tf`

# Usage
1. Rename `tf.vars.example` to `tf.vars`
2. Update `tf.vars` file with your cluster name, cluster region and CAST AI API token.
3. Initialize Terraform. Under example root folder run:
```
terraform init
```
4. Run Terraform apply:
```
terraform apply -var-file=tf.vars
```
5. To destroy resources created by this example:
```
terraform destroy -var-file=tf.vars
```
5 changes: 5 additions & 0 deletions examples/aks/aks_cluster_data/aks.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# USE Existing AKS CLuster
data "azurerm_kubernetes_cluster" "this" {
name = var.cluster_name
resource_group_name = var.cluster_resource_group_name
}
117 changes: 117 additions & 0 deletions examples/aks/aks_cluster_data/castai.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# 3. Connect AKS cluster to CAST AI in READ-ONLY mode.

# Configure Data sources and providers required for CAST AI connection.
data "azurerm_subscription" "current" {}

provider "castai" {
api_url = var.castai_api_url
api_token = var.castai_api_token
}

provider "helm" {
kubernetes {
host = data.azurerm_kubernetes_cluster.this.kube_config.0.host
client_certificate = base64decode(data.azurerm_kubernetes_cluster.this.kube_config.0.client_certificate)
client_key = base64decode(data.azurerm_kubernetes_cluster.this.kube_config.0.client_key)
cluster_ca_certificate = base64decode(data.azurerm_kubernetes_cluster.this.kube_config.0.cluster_ca_certificate)
}
}

# Configure AKS cluster connection to CAST AI using CAST AI aks-cluster module.
module "castai-aks-cluster" {
source = "castai/aks/castai"

api_url = var.castai_api_url

aks_cluster_name = var.cluster_name
aks_cluster_region = var.cluster_region
node_resource_group = data.azurerm_kubernetes_cluster.this.node_resource_group
resource_group = data.azurerm_kubernetes_cluster.this.resource_group_name

delete_nodes_on_disconnect = var.delete_nodes_on_disconnect

subscription_id = data.azurerm_subscription.current.subscription_id
tenant_id = data.azurerm_subscription.current.tenant_id

default_node_configuration = module.castai-aks-cluster.castai_node_configurations["default"]

node_configurations = {
default = {
disk_cpu_ratio = 25
subnets = [data.azurerm_subnet.internal.id]
tags = var.tags
}

test_node_config = {
disk_cpu_ratio = 25
subnets = [data.azurerm_subnet.internal.id]
tags = var.tags
max_pods_per_node = 40
}
}

node_templates = {
spot_template = {
configuration_id = module.castai-aks-cluster.castai_node_configurations["default"]
should_taint = true


constraints = {
fallback_restore_rate_seconds = 1800
spot = true
use_spot_fallbacks = true
min_cpu = 4
max_cpu = 100
instance_families = {
exclude = ["standard_DPLSv5"]
}
compute_optimized = false
storage_optimized = false
}
}
}

// Configure Autoscaler policies as per API specification https://api.cast.ai/v1/spec/#/PoliciesAPI/PoliciesAPIUpsertClusterPolicies.
// Here:
// - unschedulablePods - Unscheduled pods policy
// - spotInstances - Spot instances configuration
// - nodeDownscaler - Node deletion policy
autoscaler_policies_json = <<-EOT
{
"enabled": true,
"unschedulablePods": {
"enabled": true
},
"spotInstances": {
"enabled": true,
"clouds": ["azure"],
"spotBackups": {
"enabled": true
},
"spotDiversityEnabled": false
},
"nodeDownscaler": {
"enabled": true,
"emptyNodes": {
"enabled": true
},
"evictor": {
"aggressiveMode": false,
"cycleInterval": "5m10s",
"dryRun": false,
"enabled": true,
"nodeGracePeriodMinutes": 10,
"scopedMode": false
}
},
"clusterLimits": {
"cpu": {
"maxCores": 20,
"minCores": 1
},
"enabled": true
}
}
EOT

}
8 changes: 8 additions & 0 deletions examples/aks/aks_cluster_data/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Following providers required by AKS and Vnet resources.
provider "azurerm" {
features {}
}

provider "azuread" {
tenant_id = data.azurerm_subscription.current.tenant_id
}
8 changes: 8 additions & 0 deletions examples/aks/aks_cluster_data/tf.vars.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cluster_name = "<place-holder>"
cluster_region = "<place-holder>"
castai_api_token = "<place-holder>"

subnet_name = "<place-holder>"
vnet_name = "<place-holder>
subnet_resource_group_name = "<place-holder>
cluster_resource_group_name = "<place-holder>"
54 changes: 54 additions & 0 deletions examples/aks/aks_cluster_data/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# AKS cluster variables.
variable "cluster_name" {
type = string
description = "Name of the AKS cluster, resources will be created for."
}

variable "cluster_region" {
type = string
description = "Region of the AKS cluster, resources will be created for."
}

variable "castai_api_url" {
type = string
description = "URL of alternative CAST AI API to be used during development or testing"
default = "https://api.cast.ai"
}

# Variables required for connecting EKS cluster to CAST AI
variable "castai_api_token" {
type = string
description = "CAST AI API token created in console.cast.ai API Access keys section"
}

variable "delete_nodes_on_disconnect" {
type = bool
description = "Optional parameter, if set to true - CAST AI provisioned nodes will be deleted from cloud on cluster disconnection. For production use it is recommended to set it to false."
default = true
}

variable "tags" {
type = map(any)
description = "Optional tags for new cluster nodes. This parameter applies only to new nodes - tags for old nodes are not reconciled."
default = {}
}

variable "cluster_resource_group_name" {
type = string
description = "Name of resource group in which cluster was created"
}

variable "subnet_name" {
type = string
description = "Name of subnet used for provisioning CAST AI nodes"
}

variable "vnet_name" {
type = string
description = "Name of virtual network used for provisioning CAST AI nodes"
}

variable "subnet_resource_group_name" {
type = string
description = "Name of resource group in which vnet was created"
}
14 changes: 14 additions & 0 deletions examples/aks/aks_cluster_data/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
}
azuread = {
source = "hashicorp/azuread"
}
castai = {
source = "castai/castai"
}
}
required_version = ">= 0.13"
}
7 changes: 7 additions & 0 deletions examples/aks/aks_cluster_data/vnet.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# 1. read virtual network and resource group for the cluster.

data "azurerm_subnet" "internal" {
name = var.subnet_name
virtual_network_name = var.vnet_name
resource_group_name = var.subnet_resource_group_name
}

0 comments on commit 06997b3

Please sign in to comment.