Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add example of onboarding AKS cluster using existing cluster #191

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions examples/aks/aks_cluster_data/README.MD
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# AKS and CAST AI example with CAST AI Autoscaler policies and additional Node Configurations
Following example shows how to onboard AKS cluster to CAST AI, configure [Autoscaler policies](https://docs.cast.ai/reference/policiesapi_upsertclusterpolicies) and additional [Node Configurations](https://docs.cast.ai/docs/node-configuration/).

Example configuration should be analysed in the following order:
1. Use terraform datasource to fetch Virtual network - `vnet.tf`
2. Use terraform datasource to fetch AKS cluster - `aks.tf`
3. Create CAST AI related resources to connect AKS cluster to CAST AI, configure Autoscaler and Node Configurations - `castai.tf`

# Usage
1. Rename `tf.vars.example` to `tf.vars`
2. Update `tf.vars` file with your cluster name, cluster region and CAST AI API token.
3. Initialize Terraform. Under example root folder run:
```
terraform init
```
4. Run Terraform apply:
```
terraform apply -var-file=tf.vars
```
5. To destroy resources created by this example:
```
terraform destroy -var-file=tf.vars
```
5 changes: 5 additions & 0 deletions examples/aks/aks_cluster_data/aks.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# USE Existing AKS CLuster
data "azurerm_kubernetes_cluster" "this" {
name = var.cluster_name
resource_group_name = var.cluster_resource_group_name
}
117 changes: 117 additions & 0 deletions examples/aks/aks_cluster_data/castai.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# 3. Connect AKS cluster to CAST AI in READ-ONLY mode.

# Configure Data sources and providers required for CAST AI connection.
data "azurerm_subscription" "current" {}

provider "castai" {
api_url = var.castai_api_url
api_token = var.castai_api_token
}

provider "helm" {
kubernetes {
host = data.azurerm_kubernetes_cluster.this.kube_config.0.host
client_certificate = base64decode(data.azurerm_kubernetes_cluster.this.kube_config.0.client_certificate)
client_key = base64decode(data.azurerm_kubernetes_cluster.this.kube_config.0.client_key)
cluster_ca_certificate = base64decode(data.azurerm_kubernetes_cluster.this.kube_config.0.cluster_ca_certificate)
}
}

# Configure AKS cluster connection to CAST AI using CAST AI aks-cluster module.
module "castai-aks-cluster" {
source = "castai/aks/castai"

api_url = var.castai_api_url

aks_cluster_name = var.cluster_name
aks_cluster_region = var.cluster_region
node_resource_group = data.azurerm_kubernetes_cluster.this.node_resource_group
resource_group = data.azurerm_kubernetes_cluster.this.resource_group_name

delete_nodes_on_disconnect = var.delete_nodes_on_disconnect

subscription_id = data.azurerm_subscription.current.subscription_id
tenant_id = data.azurerm_subscription.current.tenant_id

default_node_configuration = module.castai-aks-cluster.castai_node_configurations["default"]

node_configurations = {
default = {
disk_cpu_ratio = 25
subnets = [data.azurerm_subnet.internal.id]
tags = var.tags
}

test_node_config = {
disk_cpu_ratio = 25
subnets = [data.azurerm_subnet.internal.id]
tags = var.tags
max_pods_per_node = 40
}
}

node_templates = {
spot_template = {
configuration_id = module.castai-aks-cluster.castai_node_configurations["default"]
should_taint = true


constraints = {
fallback_restore_rate_seconds = 1800
spot = true
use_spot_fallbacks = true
min_cpu = 4
max_cpu = 100
instance_families = {
exclude = ["standard_DPLSv5"]
}
compute_optimized = false
storage_optimized = false
}
}
}

// Configure Autoscaler policies as per API specification https://api.cast.ai/v1/spec/#/PoliciesAPI/PoliciesAPIUpsertClusterPolicies.
// Here:
// - unschedulablePods - Unscheduled pods policy
// - spotInstances - Spot instances configuration
// - nodeDownscaler - Node deletion policy
autoscaler_policies_json = <<-EOT
{
"enabled": true,
"unschedulablePods": {
"enabled": true
},
"spotInstances": {
"enabled": true,
"clouds": ["azure"],
"spotBackups": {
"enabled": true
},
"spotDiversityEnabled": false
},
"nodeDownscaler": {
"enabled": true,
"emptyNodes": {
"enabled": true
},
"evictor": {
"aggressiveMode": false,
"cycleInterval": "5m10s",
"dryRun": false,
"enabled": true,
"nodeGracePeriodMinutes": 10,
"scopedMode": false
}
},
"clusterLimits": {
"cpu": {
"maxCores": 20,
"minCores": 1
},
"enabled": true
}
}
EOT

}
8 changes: 8 additions & 0 deletions examples/aks/aks_cluster_data/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Following providers required by AKS and Vnet resources.
provider "azurerm" {
features {}
}

provider "azuread" {
tenant_id = data.azurerm_subscription.current.tenant_id
}
8 changes: 8 additions & 0 deletions examples/aks/aks_cluster_data/tf.vars.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cluster_name = "<place-holder>"
cluster_region = "<place-holder>"
castai_api_token = "<place-holder>"

subnet_name = "<place-holder>"
vnet_name = "<place-holder>
subnet_resource_group_name = "<place-holder>
cluster_resource_group_name = "<place-holder>"
54 changes: 54 additions & 0 deletions examples/aks/aks_cluster_data/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# AKS cluster variables.
variable "cluster_name" {
type = string
description = "Name of the AKS cluster, resources will be created for."
}

variable "cluster_region" {
type = string
description = "Region of the AKS cluster, resources will be created for."
}

variable "castai_api_url" {
type = string
description = "URL of alternative CAST AI API to be used during development or testing"
default = "https://api.cast.ai"
}

# Variables required for connecting EKS cluster to CAST AI
variable "castai_api_token" {
type = string
description = "CAST AI API token created in console.cast.ai API Access keys section"
}

variable "delete_nodes_on_disconnect" {
type = bool
description = "Optional parameter, if set to true - CAST AI provisioned nodes will be deleted from cloud on cluster disconnection. For production use it is recommended to set it to false."
default = true
}

variable "tags" {
type = map(any)
description = "Optional tags for new cluster nodes. This parameter applies only to new nodes - tags for old nodes are not reconciled."
default = {}
}

variable "cluster_resource_group_name" {
type = string
description = "Name of resource group in which cluster was created"
}

variable "subnet_name" {
type = string
description = "Name of subnet used for provisioning CAST AI nodes"
}

variable "vnet_name" {
type = string
description = "Name of virtual network used for provisioning CAST AI nodes"
}

variable "subnet_resource_group_name" {
type = string
description = "Name of resource group in which vnet was created"
}
14 changes: 14 additions & 0 deletions examples/aks/aks_cluster_data/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
}
azuread = {
source = "hashicorp/azuread"
}
castai = {
source = "castai/castai"
}
}
required_version = ">= 0.13"
}
7 changes: 7 additions & 0 deletions examples/aks/aks_cluster_data/vnet.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# 1. read virtual network and resource group for the cluster.

data "azurerm_subnet" "internal" {
name = var.subnet_name
virtual_network_name = var.vnet_name
resource_group_name = var.subnet_resource_group_name
}
Loading