Skip to content

Commit

Permalink
feat(localenv): add trace collection (with Tempo) in local playground (
Browse files Browse the repository at this point in the history
…#2816)

* feat: add telemetry stack to localenv under command

* chore: remove tempo

* chore(localenv): update prometheus scrape interval

* chore: explicitly set otel collector endpoint

* chore: change prometheus scrape interval to 15s

* chore: update dashboard queries

* chore: add readme

* chore: set auto-refresh within grafana dashboard

* chore: add psql command

* feat(backend): add trace auto-instrumentation

* feat(localenv): add tempo to telemetry stack

* feat(localenv): add example panel of traces in the grafana dashboard

* chore: format for example dashboard

* chore: rearrange dashboard

* chore(backend): add instrumentation only if enabled
  • Loading branch information
mkurapov authored Jul 23, 2024
1 parent e65e032 commit ce66ab8
Show file tree
Hide file tree
Showing 12 changed files with 683 additions and 68 deletions.
3 changes: 2 additions & 1 deletion localenv/telemetry/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ The telemetry components include:

- **OpenTelemetry Collector**: Collects and processes telemetry data from `cloud-nine-backend` and `happy-life-backend` services.
- **Prometheus**: Scrapes metrics from the OpenTelemetry collector, and stores them.
- **Grafana**: Visualizes metrics from Prometheus.
- **Tempo**: Ingests traces from the OpenTelemetry collector, and stores them.
- **Grafana**: Visualizes metrics from Prometheus and traces from Tempo.

## Usage

Expand Down
17 changes: 16 additions & 1 deletion localenv/telemetry/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@ services:
cloud-nine-backend:
environment:
ENABLE_TELEMETRY: true
ENABLE_TELEMETRY_TRACES: true
LIVENET: false
OPEN_TELEMETRY_COLLECTOR_URLS: http://otel-collector:4317
OPEN_TELEMETRY_TRACE_COLLECTOR_URLS: http://otel-collector:4317

happy-life-backend:
environment:
ENABLE_TELEMETRY: true
ENABLE_TELEMETRY_TRACES: true
LIVENET: false
OPEN_TELEMETRY_COLLECTOR_URLS: http://otel-collector:4317
OPEN_TELEMETRY_TRACE_COLLECTOR_URLS: http://otel-collector:4317

otel-collector:
hostname: otel-collector
image: otel/opentelemetry-collector:latest
command: "--config=/etc/otel-collector-config.yaml"
networks:
Expand All @@ -30,6 +33,17 @@ services:
ports:
- "9090:9090"

tempo:
image: grafana/tempo:latest
command: "-config.file=/etc/tempo.yaml"
networks:
- rafiki
ports:
- "3200:3200"
volumes:
- ../telemetry/tempo.yaml:/etc/tempo.yaml
- tempo-data:/var/tempo

grafana:
image: grafana/grafana:latest
networks:
Expand All @@ -45,3 +59,4 @@ services:

volumes:
grafana_storage:
tempo-data:
106 changes: 104 additions & 2 deletions localenv/telemetry/grafana/provisioning/dashboards/example.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1,
"links": [],
"panels": [
{
Expand Down Expand Up @@ -119,6 +118,109 @@
"title": "Transaction Amount",
"type": "timeseries"
},
{
"datasource": {
"type": "tempo",
"uid": "P214B5B846CF3925F"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 4,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": ["sum"],
"show": false
},
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "Duration"
}
]
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "tempo",
"uid": "P214B5B846CF3925F"
},
"filters": [
{
"id": "9bab4a0a",
"operator": "=",
"scope": "span"
},
{
"id": "service-name",
"operator": "=",
"scope": "resource",
"tag": "service.name",
"value": ["RAFIKI_NETWORK"],
"valueType": "string"
},
{
"id": "span-name",
"operator": "=",
"scope": "span",
"tag": "name",
"value": [],
"valueType": "string"
},
{
"id": "min-duration",
"operator": ">",
"tag": "duration",
"value": "100ms",
"valueType": "duration"
}
],
"limit": 20,
"queryType": "traceqlSearch",
"refId": "A",
"tableType": "traces"
}
],
"title": "Traces > 100ms",
"type": "table"
},
{
"datasource": {
"type": "prometheus",
Expand Down Expand Up @@ -339,6 +441,6 @@
"timezone": "browser",
"title": "Example Dashboard",
"uid": "fdr58stwkr6yof",
"version": 1,
"version": 2,
"weekStart": ""
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,8 @@ datasources:
isDefault: true
version: 1
editable: true
- name: Tempo
type: tempo
access: proxy
url: http://tempo:3200
editable: true
8 changes: 8 additions & 0 deletions localenv/telemetry/otel-collector-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,18 @@ exporters:
verbosity: detailed
prometheus:
endpoint: 0.0.0.0:8491
otlp:
endpoint: http://tempo:8492
tls:
insecure: true

service:
pipelines:
metrics:
receivers: [otlp]
processors: [batch]
exporters: [prometheus, debug]
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlp, debug]
17 changes: 17 additions & 0 deletions localenv/telemetry/tempo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
server:
http_listen_port: 3200

distributor:
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:8492

storage:
trace:
backend: local
local:
path: /var/tempo/blocks
wal:
path: /var/tempo/wal
2 changes: 1 addition & 1 deletion packages/backend/Dockerfile.prod
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@ COPY --from=builder /home/rafiki/packages/backend/dist ./packages/backend/dist
COPY --from=builder /home/rafiki/packages/token-introspection/dist ./packages/token-introspection/dist
COPY --from=builder /home/rafiki/packages/backend/knexfile.js ./packages/backend/knexfile.js

CMD ["node", "/home/rafiki/packages/backend/dist/index.js"]
CMD ["node", "-r", "/home/rafiki/packages/backend/dist/telemetry/index.js", "/home/rafiki/packages/backend/dist/index.js"]
12 changes: 10 additions & 2 deletions packages/backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"copy-op-schemas": "cp ./node_modules/@interledger/open-payments/dist/openapi/specs/schemas.yaml ./src/openapi/specs/",
"prepack": "pnpm build",
"postinstall": "pnpm copy-op-schemas",
"dev": "ts-node-dev --inspect=0.0.0.0:9229 --respawn --transpile-only src/index.ts"
"dev": "ts-node-dev --inspect=0.0.0.0:9229 --respawn --transpile-only --require ./src/telemetry/index.ts src/index.ts"
},
"devDependencies": {
"@apollo/client": "^3.9.9",
Expand Down Expand Up @@ -64,8 +64,16 @@
"@koa/router": "^12.0.0",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-metrics-otlp-grpc": "^0.49.1",
"@opentelemetry/exporter-trace-otlp-grpc": "^0.52.1",
"@opentelemetry/instrumentation": "^0.52.1",
"@opentelemetry/instrumentation-graphql": "^0.42.0",
"@opentelemetry/instrumentation-http": "^0.52.1",
"@opentelemetry/instrumentation-pg": "^0.43.0",
"@opentelemetry/instrumentation-undici": "^0.4.0",
"@opentelemetry/resources": "^1.22.0",
"@opentelemetry/sdk-metrics": "^1.22.0",
"@opentelemetry/sdk-metrics": "^1.25.1",
"@opentelemetry/sdk-node": "^0.52.1",
"@opentelemetry/sdk-trace-node": "^1.25.1",
"ajv": "^8.12.0",
"axios": "1.6.8",
"base64url": "^3.0.1",
Expand Down
5 changes: 5 additions & 0 deletions packages/backend/src/config/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ const privateKeyFileValue = loadOrGenerateKey(privateKeyFileEnv)
export const Config = {
logLevel: envString('LOG_LEVEL', 'info'),
enableTelemetry: envBool('ENABLE_TELEMETRY', false),
enableTelemetryTraces: envBool('ENABLE_TELEMETRY_TRACES', false),
openTelemetryTraceCollectorUrls: envStringArray(
'OPEN_TELEMETRY_TRACE_COLLECTOR_URLS',
[]
),
livenet: envBool('LIVENET', false),
openTelemetryCollectors: envStringArray(
'OPEN_TELEMETRY_COLLECTOR_URLS',
Expand Down
80 changes: 80 additions & 0 deletions packages/backend/src/telemetry/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { Config } from '../config/app'
import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-grpc'
import { Resource } from '@opentelemetry/resources'
import {
MeterProvider,
PeriodicExportingMetricReader
} from '@opentelemetry/sdk-metrics'

import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc'
import { api } from '@opentelemetry/sdk-node'
import { PgInstrumentation } from '@opentelemetry/instrumentation-pg'
import { GraphQLInstrumentation } from '@opentelemetry/instrumentation-graphql'

import { HttpInstrumentation } from '@opentelemetry/instrumentation-http'
import {
BatchSpanProcessor,
NodeTracerProvider
} from '@opentelemetry/sdk-trace-node'
import { registerInstrumentations } from '@opentelemetry/instrumentation'
import { UndiciInstrumentation } from '@opentelemetry/instrumentation-undici'

// debug logger:
// diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG)

const SERVICE_NAME = 'RAFIKI_NETWORK'
const rafikiResource = new Resource({
'service.name': SERVICE_NAME,
instance: Config.instanceName
})

if (Config.enableTelemetry) {
const meterReaders = []

for (const url of Config.openTelemetryCollectors) {
const metricExporter = new PeriodicExportingMetricReader({
exporter: new OTLPMetricExporter({
url
}),
exportIntervalMillis: Config.openTelemetryExportInterval ?? 15000
})

meterReaders.push(metricExporter)
}

const meterProvider = new MeterProvider({
resource: rafikiResource,
readers: meterReaders
})

api.metrics.setGlobalMeterProvider(meterProvider)
}

if (Config.enableTelemetryTraces) {
const tracerProvider = new NodeTracerProvider({
resource: rafikiResource
})

for (const url of Config.openTelemetryTraceCollectorUrls) {
const traceExporter = new OTLPTraceExporter({
url
})

tracerProvider.addSpanProcessor(new BatchSpanProcessor(traceExporter))
}

tracerProvider.register()

registerInstrumentations({
instrumentations: [
new UndiciInstrumentation(),
new HttpInstrumentation(),
new PgInstrumentation(),
new GraphQLInstrumentation({
mergeItems: true,
ignoreTrivialResolveSpans: true,
ignoreResolveSpans: true
})
]
})
}
Loading

0 comments on commit ce66ab8

Please sign in to comment.