Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat/adds-sophon-infra #18

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions ansible/inventory/sophon.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[otel]
otel-01-sophon ansible_host=165.232.84.73

[all:vars]
ansible_ssh_private_key_file=sophon_key
ansible_user=root
customer=sophon
is_custom_otel=true
13 changes: 13 additions & 0 deletions ansible/roles/collector/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,26 @@
when: not otel_installed.stat.exists

- name: Generate otel config file
#TODO: find better way to handle default case.
# On custom infra no "network" var is needed, but nedded on default case.
when: not "{{ is_custom_otel }}"
ansible.builtin.template:
src: config.yaml
dest: /etc/otel/config.yaml
mode: "0755"
notify:
- Restart otel
- Restart journald

- name: Generate Custom otel config file
when: "{{ is_custom_otel }}"
ansible.builtin.template:
src: custom.config.yaml
dest: /etc/otel/config.yaml
mode: "0755"
notify:
- Restart otel
- Restart journald

- name: Generate service file
ansible.builtin.template:
Expand Down
12 changes: 10 additions & 2 deletions ansible/roles/collector/templates/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ processors:
match_type: expr
expressions:
- Label("version") == "{{ avail_light_version }}"
filter/customers:
metrics:
exclude:
match_type: regexp
#TODO: Later make it extensible enough so we can keep adding customers w/o hardcoding values.
metric_names:
- sophon/.*
- sophon_.*

# https://github.com/open-telemetry/opentelemetry-collector/blob/v0.83.0/processor/batchprocessor/README.md
batch:
Expand Down Expand Up @@ -56,5 +64,5 @@ service:
pipelines:
metrics/datadog:
receivers: [prometheus, otlp]
processors: [filter/version]
exporters: [prometheus, datadog, logging]
processors: [filter/version, filter/customers]
exporters: [prometheus, datadog, logging]
59 changes: 59 additions & 0 deletions ansible/roles/collector/templates/custom.config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317

prometheus:
config:
scrape_configs:
- job_name: 'otel'
scrape_interval: 10s
static_configs:
- targets: ['0.0.0.0:8888']

#TODO:Fix broad filters.
processors:
filter/customers:
metrics:
include:
match_type: regexp
metric_names:
- "{{ customer }}/.*"
- "{{ customer }}_.*"
exclude:
match_type: regexp
metric_names:
- "avail.*"

# https://github.com/open-telemetry/opentelemetry-collector/blob/v0.83.0/processor/batchprocessor/README.md
batch:
send_batch_max_size: 1000
send_batch_size: 500 # number of metrics received before send
timeout: 120s #if 0 ignores batch_size and sends immediately

extensions:
health_check:

exporters:
prometheus:
#TODO: Check prom server endpoint with Lakshay
endpoint: "0.0.0.0:8889"
const_labels:
basedn: "{{ customer }}.lightclient"
metric_expiration: 5m
resource_to_telemetry_conversion:
enabled: true

logging:
verbosity: detailed
sampling_initial: 5
sampling_thereafter: 200

service:
extensions: [health_check]
pipelines:
metrics:
receivers: [prometheus, otlp]
processors: [filter/customers]
exporters: [prometheus, logging]
3 changes: 2 additions & 1 deletion ansible/roles/datadog/templates/datadog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ network_config:
systemProbe:
enabled: enabled

#TODO: Decouple network logic to allow extensibility to custom infra deploys
tags:
- basedn:lightclient.{{ network }}
- hosted_on:digitalocean
- component:lightclient
- role:{{ group_names[0] }}
- role:{{ group_names[0] }}
10 changes: 10 additions & 0 deletions terraform/deployments/infrastructure/sophon-otel/inventory.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[otel]
%{ for otel in jsondecode(otel) ~}
${otel.hostname} ansible_host=${otel.ip}
%{ endfor ~}

[all:vars]
ansible_ssh_private_key_file=${customer}_key
ansible_user=root
customer=${customer}
is_custom_otel=true
37 changes: 37 additions & 0 deletions terraform/deployments/infrastructure/sophon-otel/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
data "digitalocean_project" "project" {
name = "Infrastructure"
}

module "otel" {
source = "../../../modules/do"
node_count = 1
image = "ubuntu-22-04-x64"
spec = "c-8"
node_type = "otel"
region = "ams3"
tags = ["role:otel", "owner:devops", "customer:sophon", "Infrastructure", "domain:metrics"]
network = var.customer
ssh_fingerprint = [digitalocean_ssh_key.key.fingerprint]
project = data.digitalocean_project.project
}

data "template_file" "ansible_inventory" {
template = file("${path.module}/inventory.tpl")
vars = {
otel = jsonencode(module.otel.droplets)
customer = var.customer
}
}

resource "local_file" "ansible_inventory" {
content = data.template_file.ansible_inventory.rendered
filename = "${path.module}/../../../../ansible/inventory/${var.customer}.ini"
}

variable "customer" {
default = "sophon"
}

output "otel" {
value = module.otel.droplets
}
33 changes: 33 additions & 0 deletions terraform/deployments/infrastructure/sophon-otel/provider.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
terraform {
backend "s3" {
bucket = "do-avail-light-tfstate"
key = "customers/sophon"
region = "eu-west-1"
}
required_providers {

aws = {
source = "hashicorp/aws"
version = "~> 5.39.0"
}
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.34.1"
}
}
required_version = ">= 1.6.0"

}

provider "aws" {
region = "eu-west-1"
}

provider "digitalocean" {
token = data.aws_ssm_parameter.do_token.value
}

data "aws_ssm_parameter" "do_token" {
name = "do_token"
with_decryption = true
}
21 changes: 21 additions & 0 deletions terraform/deployments/infrastructure/sophon-otel/ssh.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
resource "tls_private_key" "pk" {
algorithm = "RSA"
rsa_bits = 4096
}

resource "aws_ssm_parameter" "private_keypair" {
name = "/lightclient/${var.customer}/private_keypair"
description = "Private Keypair"
type = "SecureString"
value = tls_private_key.pk.private_key_pem
}

resource "digitalocean_ssh_key" "key" {
name = "lc-ssh-key-${var.customer}"
public_key = tls_private_key.pk.public_key_openssh
}

resource "local_file" "ssh_key" {
content = tls_private_key.pk.private_key_pem
filename = "${path.module}/../../../../ansible/${var.customer}_key"
}