From 111dc96719c35034be46688722072ddc27929499 Mon Sep 17 00:00:00 2001 From: Micah Nagel Date: Wed, 25 Sep 2024 12:58:42 -0600 Subject: [PATCH] feat!: switch from promtail to vector (#724) ## Description BREAKING CHANGE: Noting this as a breaking change as Promtail is removed and replaced by Vector. If using overrides to setup additional log targets/endpoints this configuration will need to be updated to Vector's chart/config formats. Primary docs on rationale, decision, and impact of this switch are [here](https://github.com/defenseunicorns/uds-core/blob/vector-add/src/vector/README.md). ## Related Issue Fixes https://github.com/defenseunicorns/uds-core/issues/377 ## Type of change - [ ] Bug fix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Other (security config, docs update, etc) ## Checklist before merging - [x] Test, docs, adr added or updated as needed - [x] [Contributor Guide](https://github.com/defenseunicorns/uds-template-capability/blob/main/CONTRIBUTING.md) followed --- .github/filters.yaml | 4 +- .vscode/settings.json | 4 +- CHANGELOG.md | 2 +- README.md | 2 +- compliance/oscal-component.yaml | 2 +- docs/application-baseline.md | 2 +- .../resource-configuration-and-ha.md | 14 ++- packages/standard/zarf.yaml | 6 +- renovate.json | 6 +- src/grafana/values/values.yaml | 3 + src/istio/oscal-component.yaml | 2 +- src/loki/chart/templates/uds-package.yaml | 6 +- .../controllers/exemptions/exemptions.spec.ts | 38 +++--- src/pepr/policies/exemptions/index.spec.ts | 2 +- src/pepr/zarf.yaml | 45 +++---- src/promtail/README.md | 1 - src/promtail/chart/templates/service.yaml | 18 --- .../chart/templates/uds-exemption.yaml | 24 ---- src/promtail/chart/templates/uds-package.yaml | 37 ------ src/promtail/chart/values.yaml | 0 src/promtail/common/zarf.yaml | 32 ----- src/promtail/tasks.yaml | 10 -- src/promtail/values/registry1-values.yaml | 10 -- src/promtail/values/unicorn-values.yaml | 10 -- src/promtail/values/upstream-values.yaml | 10 -- src/promtail/values/values.yaml | 116 ----------------- src/promtail/zarf.yaml | 51 -------- src/vector/README.md | 42 +++++++ src/{promtail => vector}/chart/.helmignore | 0 src/{promtail => vector}/chart/Chart.yaml | 4 +- .../chart/templates/_helpers.tpl | 20 +-- src/vector/chart/templates/uds-exemption.yaml | 21 ++++ src/vector/chart/templates/uds-package.yaml | 46 +++++++ src/vector/chart/values.yaml | 16 +++ src/vector/common/zarf.yaml | 38 ++++++ src/{promtail => vector}/oscal-component.yaml | 37 +++--- src/vector/tasks.yaml | 10 ++ src/vector/values/registry1-values.yaml | 3 + src/vector/values/unicorn-values.yaml | 3 + src/vector/values/upstream-values.yaml | 3 + src/vector/values/values.yaml | 118 ++++++++++++++++++ src/vector/zarf.yaml | 48 +++++++ 42 files changed, 444 insertions(+), 422 deletions(-) delete mode 100644 src/promtail/README.md delete mode 100644 src/promtail/chart/templates/service.yaml delete mode 100644 src/promtail/chart/templates/uds-exemption.yaml delete mode 100644 src/promtail/chart/templates/uds-package.yaml delete mode 100644 src/promtail/chart/values.yaml delete mode 100644 src/promtail/common/zarf.yaml delete mode 100644 src/promtail/tasks.yaml delete mode 100644 src/promtail/values/registry1-values.yaml delete mode 100644 src/promtail/values/unicorn-values.yaml delete mode 100644 src/promtail/values/upstream-values.yaml delete mode 100644 src/promtail/values/values.yaml delete mode 100644 src/promtail/zarf.yaml create mode 100644 src/vector/README.md rename src/{promtail => vector}/chart/.helmignore (100%) rename src/{promtail => vector}/chart/Chart.yaml (91%) rename src/{promtail => vector}/chart/templates/_helpers.tpl (71%) create mode 100644 src/vector/chart/templates/uds-exemption.yaml create mode 100644 src/vector/chart/templates/uds-package.yaml create mode 100644 src/vector/chart/values.yaml create mode 100644 src/vector/common/zarf.yaml rename src/{promtail => vector}/oscal-component.yaml (86%) create mode 100644 src/vector/tasks.yaml create mode 100644 src/vector/values/registry1-values.yaml create mode 100644 src/vector/values/unicorn-values.yaml create mode 100644 src/vector/values/upstream-values.yaml create mode 100644 src/vector/values/values.yaml create mode 100644 src/vector/zarf.yaml diff --git a/.github/filters.yaml b/.github/filters.yaml index 29fbf82ca..77ec1bba8 100644 --- a/.github/filters.yaml +++ b/.github/filters.yaml @@ -73,8 +73,8 @@ prometheus-stack: - "!**/*.gif" - "!**/*.svg" -promtail: - - "src/promtail/**" +vector: + - "src/vector/**" - "!**/*.md" - "!**/*.jpg" - "!**/*.png" diff --git a/.vscode/settings.json b/.vscode/settings.json index 11c8576a5..97ec6433e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -12,7 +12,6 @@ "https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/uds.schema.json": [ "uds-bundle.yaml" ], - // renovate: datasource=github-tags depName=defenseunicorns/uds-cli versioning=semver "https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.16.0/tasks.schema.json": [ "tasks.yaml", @@ -37,7 +36,6 @@ "MITM", "neuvector", "opensource", - "promtail", "Quarkus", "Quickstart", "seccomp", @@ -47,7 +45,7 @@ "cSpell.enabled": true, "[typescript]": { "editor.codeActionsOnSave": { - "source.organizeImports": "always" + "source.organizeImports": "always" } }, } diff --git a/CHANGELOG.md b/CHANGELOG.md index 28e7d1a25..88a817fd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -972,5 +972,5 @@ PRE RELEASE - CONTRIBUTING.md - DEVELOPMENT_MAINTENANCE.md - LICENSE -- READEME.md +- README.md - zarf.yaml diff --git a/README.md b/README.md index e28a0c371..9d1fc389d 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ UDS Core establishes a secure baseline for cloud-native systems and ships with c - [Neuvector](https://open-docs.neuvector.com/) - Container Security - [Pepr](https://pepr.dev) - UDS policy engine & operator - [Prometheus Stack](https://github.com/prometheus-operator/kube-prometheus) - Monitoring -- [Promtail](https://grafana.com/docs/loki/latest/send-data/promtail/) - Log Aggregation +- [Vector](https://vector.dev/) - Log Aggregation - [Velero](https://velero.io/) - Backup & Restore - [UDS Runtime](https://github.com/defenseunicorns/uds-runtime) - Frontend Views & Insights diff --git a/compliance/oscal-component.yaml b/compliance/oscal-component.yaml index ecb88933e..4be69f019 100644 --- a/compliance/oscal-component.yaml +++ b/compliance/oscal-component.yaml @@ -19,7 +19,7 @@ component-definition: - href: 'file://./../src/loki/oscal-component.yaml' - href: 'file://./../src/neuvector/oscal-component.yaml' - href: 'file://./../src/prometheus-stack/oscal-component.yaml' - - href: 'file://./../src/promtail/oscal-component.yaml' + - href: 'file://./../src/vector/oscal-component.yaml' - href: 'file://./../src/velero/oscal-component.yaml' capabilities: diff --git a/docs/application-baseline.md b/docs/application-baseline.md index 5ebabf889..7eec1c580 100644 --- a/docs/application-baseline.md +++ b/docs/application-baseline.md @@ -18,7 +18,7 @@ For optimal deployment and operational efficiency, it is important to deliver a | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **Service Mesh** | **[Istio](https://istio.io/):** A powerful service mesh that provides traffic management, load balancing, security, and observability features. | | **Monitoring** | **[Metrics Server](https://kubernetes-sigs.github.io/metrics-server/):** Provides container resource utilization metrics API for Kubernetes clusters. Metrics server is an optional (non-default) component since most Kubernetes distros provide it by default.

**[Prometheus](https://prometheus.io/):** Scrapes Metrics Server API and application metrics and stores the data in a time-series database for insights into application health and performance.

**[Grafana](https://grafana.com/grafana/):** Provides visualization and alerting capabilities based on Prometheus's time-series database of metrics. | -| **Logging** | **[Promtail](https://grafana.com/docs/loki/latest/send-data/promtail/):** A companion agent that efficiently gathers and sends container logs to Loki, simplifying log monitoring, troubleshooting, and compliance auditing, enhancing the overall observability of the mission environment.

**[Loki](https://grafana.com/docs/loki/latest/):** A log aggregation system that allows users to store, search, and analyze logs across their applications. | +| **Logging** | **[Vector](https://vector.dev/):** A companion agent that efficiently gathers and sends container logs to Loki and other storage locations (S3, SIEM tools, etc), simplifying log monitoring, troubleshooting, and compliance auditing, enhancing the overall observability of the mission environment.

**[Loki](https://grafana.com/docs/loki/latest/):** A log aggregation system that allows users to store, search, and analyze logs across their applications. | | **Security and Compliance** | **[NeuVector](https://open-docs.neuvector.com/):** Offers container-native security, protecting applications against threats and vulnerabilities.

**[Pepr](https://pepr.dev/):** UDS policy engine and operator for enhanced security and compliance.| | **Identity and Access Management** | **[Keycloak](https://www.keycloak.org/):** A robust open-source Identity and Access Management solution, providing centralized authentication, authorization, and user management for enhanced security and control over access to mission-critical resources.| | **Backup and Restore** | **[Velero](https://velero.io/):** Provides backup and restore capabilities for Kubernetes clusters, ensuring data protection and disaster recovery.| diff --git a/docs/configuration/resource-configuration-and-ha.md b/docs/configuration/resource-configuration-and-ha.md index ca5012eca..31b0b7c5e 100644 --- a/docs/configuration/resource-configuration-and-ha.md +++ b/docs/configuration/resource-configuration-and-ha.md @@ -72,9 +72,19 @@ packages: ## Logging -### Promtail +### Vector -By default Promtail runs as a daemonset, automatically scaling across all nodes to ensure logs are captured from each host. Typically Promtail does not need any other modifications, but you can customize its resource configuration by overriding the `resources` helm value (using the component and chart name of `promtail`). +By default Vector runs as a daemonset, automatically scaling across all nodes to ensure logs are captured from each host. Typically Vector does not need any other modifications, but you can customize its resource configuration by overriding the `resources` helm value (using the component and chart name of `vector`). Vector recommends the below resourcing when running in production: + +```yaml +resources: + requests: + memory: "64Mi" + cpu: "500m" + limits: + memory: "1024Mi" + cpu: "6000m" +``` ### Loki diff --git a/packages/standard/zarf.yaml b/packages/standard/zarf.yaml index 10d402458..2071cb2ba 100644 --- a/packages/standard/zarf.yaml +++ b/packages/standard/zarf.yaml @@ -76,11 +76,11 @@ components: import: path: ../../src/prometheus-stack - # Promtail - - name: promtail + # Vector + - name: vector required: true import: - path: ../../src/promtail + path: ../../src/vector # Grafana - name: grafana diff --git a/renovate.json b/renovate.json index a306486a3..f4428a606 100644 --- a/renovate.json +++ b/renovate.json @@ -23,9 +23,9 @@ "commitMessageTopic": "istio" }, { - "matchFileNames": ["src/promtail/**"], - "groupName": "promtail", - "commitMessageTopic": "promtail" + "matchFileNames": ["src/vector/**"], + "groupName": "vector", + "commitMessageTopic": "vector" }, { "matchFileNames": ["src/velero/**"], diff --git a/src/grafana/values/values.yaml b/src/grafana/values/values.yaml index ccaa070f7..60eed85aa 100644 --- a/src/grafana/values/values.yaml +++ b/src/grafana/values/values.yaml @@ -22,6 +22,9 @@ grafana.ini: reporting_enabled: false check_for_updates: false check_for_plugin_updates: false + feedback_links_enabled: false + plugins: + public_key_retrieval_disabled: true auth: # Disable the login form to force users to use SSO disable_login_form: true diff --git a/src/istio/oscal-component.yaml b/src/istio/oscal-component.yaml index 5ceae0134..9a9a97f74 100644 --- a/src/istio/oscal-component.yaml +++ b/src/istio/oscal-component.yaml @@ -539,7 +539,7 @@ component-definition: # Expected values expected_istiod_port := 15012 expected_istiod_protocol := "TCP" - required_namespaces := {"authservice", "grafana", "keycloak", "loki", "metrics-server", "monitoring", "neuvector", "promtail", "velero"} + required_namespaces := {"authservice", "grafana", "keycloak", "loki", "metrics-server", "monitoring", "neuvector", "vector", "velero"} # Validate NetworkPolicy for Istiod in required namespaces validate { diff --git a/src/loki/chart/templates/uds-package.yaml b/src/loki/chart/templates/uds-package.yaml index a04557a51..b7a39eb5d 100644 --- a/src/loki/chart/templates/uds-package.yaml +++ b/src/loki/chart/templates/uds-package.yaml @@ -36,12 +36,12 @@ spec: - direction: Ingress selector: app.kubernetes.io/name: loki - remoteNamespace: promtail + remoteNamespace: vector remoteSelector: - app.kubernetes.io/name: promtail + app.kubernetes.io/name: vector ports: - 8080 - description: "Promtail Log Storage" + description: "Vector Log Storage" # Egress for S3 connections - direction: Egress diff --git a/src/pepr/operator/controllers/exemptions/exemptions.spec.ts b/src/pepr/operator/controllers/exemptions/exemptions.spec.ts index 270ae3002..b1bab8da1 100644 --- a/src/pepr/operator/controllers/exemptions/exemptions.spec.ts +++ b/src/pepr/operator/controllers/exemptions/exemptions.spec.ts @@ -20,13 +20,13 @@ const prometheusMatcher = { name: "^neuvector-prometheus-exporter-pod.*", kind: MatcherKind.Pod, }; -const promtailMatcher = { namespace: "promtail", name: "^promtail-.*", kind: MatcherKind.Pod }; +const vectorMatcher = { namespace: "vector", name: "^vector-.*", kind: MatcherKind.Pod }; const exemption1UID = "exemption-1-uid"; const exemption2UID = "exemption-2-uid"; const storedEnforcerMatcher = { ...enforcerMatcher, owner: exemption1UID }; const storedControllerMatcher = { ...controllerMatcher, owner: exemption1UID }; const storedPrometheusMatcher = { ...prometheusMatcher, owner: exemption1UID }; -const storedPromtailMatcher = { ...promtailMatcher, owner: exemption2UID }; +const storedVectorMatcher = { ...vectorMatcher, owner: exemption2UID }; const neuvectorMockExemption = { metadata: { uid: exemption1UID, @@ -90,7 +90,7 @@ describe("Test processExemptions() no duplicate matchers in same CR", () => { // remove RequireNonRootUser from enforcerMatcher // remove prometheusMatcher // add DisallowHostNamespaces to controllerMatcher - // add promtailMatcher with RequireNonRootUser + // add vectorMatcher with RequireNonRootUser const updatedNeuvectorExemption = { metadata: { uid: exemption1UID, @@ -110,7 +110,7 @@ describe("Test processExemptions() no duplicate matchers in same CR", () => { ], }, { - matcher: promtailMatcher, + matcher: vectorMatcher, policies: [Policy.RequireNonRootUser], }, ], @@ -120,7 +120,7 @@ describe("Test processExemptions() no duplicate matchers in same CR", () => { processExemptions(neuvectorMockExemption, WatchPhase.Added); processExemptions(updatedNeuvectorExemption, WatchPhase.Modified); expect(ExemptionStore.getByPolicy(Policy.RequireNonRootUser)).toEqual([ - { ...storedPromtailMatcher, owner: exemption1UID }, + { ...storedVectorMatcher, owner: exemption1UID }, ]); expect(ExemptionStore.getByPolicy(Policy.DisallowPrivileged)).toEqual([ storedEnforcerMatcher, @@ -360,14 +360,14 @@ describe("Test processExemptions(); phase DELETED", () => { }); it("Does not remove exemptions set by separate CR from the one being deleted", async () => { - const promtailMockExemption = { + const vectorMockExemption = { metadata: { uid: exemption2UID, }, spec: { exemptions: [ { - matcher: promtailMatcher, + matcher: vectorMatcher, policies: [ Policy.DisallowPrivileged, Policy.DropAllCapabilities, @@ -379,12 +379,12 @@ describe("Test processExemptions(); phase DELETED", () => { } as Exemption; processExemptions(neuvectorMockExemption, WatchPhase.Added); - processExemptions(promtailMockExemption, WatchPhase.Added); + processExemptions(vectorMockExemption, WatchPhase.Added); processExemptions(neuvectorMockExemption, WatchPhase.Deleted); - expect(ExemptionStore.getByPolicy(Policy.DisallowPrivileged)).toEqual([storedPromtailMatcher]); - expect(ExemptionStore.getByPolicy(Policy.DropAllCapabilities)).toEqual([storedPromtailMatcher]); - expect(ExemptionStore.getByPolicy(Policy.RequireNonRootUser)).toEqual([storedPromtailMatcher]); + expect(ExemptionStore.getByPolicy(Policy.DisallowPrivileged)).toEqual([storedVectorMatcher]); + expect(ExemptionStore.getByPolicy(Policy.DropAllCapabilities)).toEqual([storedVectorMatcher]); + expect(ExemptionStore.getByPolicy(Policy.RequireNonRootUser)).toEqual([storedVectorMatcher]); }); it("Does not delete duplicate exemptions if set by separate CRs", async () => { @@ -448,28 +448,28 @@ describe("Test processExemptions(); phase DELETED", () => { }, } as Exemption; - const promtailMockExemption = { + const vectorMockExemption = { metadata: { uid: exemption2UID, }, spec: { exemptions: [ { - matcher: promtailMatcher, + matcher: vectorMatcher, policies: [Policy.DisallowPrivileged], }, ], }, } as Exemption; - const promtailUpdatedMockExemption = { + const vectorUpdatedMockExemption = { metadata: { uid: exemption2UID, }, spec: { exemptions: [ { - matcher: promtailMatcher, + matcher: vectorMatcher, policies: [Policy.DisallowPrivileged, Policy.RequireNonRootUser], }, ], @@ -477,14 +477,14 @@ describe("Test processExemptions(); phase DELETED", () => { } as Exemption; processExemptions(neuvectorMockExemption, WatchPhase.Added); - processExemptions(promtailMockExemption, WatchPhase.Added); - processExemptions(promtailUpdatedMockExemption, WatchPhase.Modified); + processExemptions(vectorMockExemption, WatchPhase.Added); + processExemptions(vectorUpdatedMockExemption, WatchPhase.Modified); expect(ExemptionStore.getByPolicy(Policy.RequireNonRootUser)).toEqual([ storedEnforcerMatcher, - storedPromtailMatcher, + storedVectorMatcher, ]); expect(ExemptionStore.getByPolicy(Policy.DropAllCapabilities)).toEqual([storedEnforcerMatcher]); - expect(ExemptionStore.getByPolicy(Policy.DisallowPrivileged)).toEqual([storedPromtailMatcher]); + expect(ExemptionStore.getByPolicy(Policy.DisallowPrivileged)).toEqual([storedVectorMatcher]); }); }); diff --git a/src/pepr/policies/exemptions/index.spec.ts b/src/pepr/policies/exemptions/index.spec.ts index 2ab36dd25..3f8faa429 100644 --- a/src/pepr/policies/exemptions/index.spec.ts +++ b/src/pepr/policies/exemptions/index.spec.ts @@ -34,7 +34,7 @@ describe("test registering exemptions", () => { const req = { Raw: { metadata: { - name: "promtail", + name: "vector", namespace: "monitoring", }, }, diff --git a/src/pepr/zarf.yaml b/src/pepr/zarf.yaml index 5dafad221..904045a19 100644 --- a/src/pepr/zarf.yaml +++ b/src/pepr/zarf.yaml @@ -52,31 +52,20 @@ components: actions: onDeploy: before: - - cmd: ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-api-token meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-module meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-tls meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate serviceaccount -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate clusterrolebinding pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate clusterrole pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate role -n pepr-system pepr-uds-core-store meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate rolebinding -n pepr-system pepr-uds-core-store meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate service -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate service -n pepr-system pepr-uds-core-watcher meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate deployment -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate deployment -n pepr-system pepr-uds-core-watcher meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate mutatingwebhookconfiguration -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true - - cmd: ./zarf tools kubectl annotate validatingwebhookconfiguration -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true - mute: true + - mute: true + description: "Update helm ownership for Pepr resources if necessary during the upgrade" + cmd: | + ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-api-token meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-module meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate secret -n pepr-system pepr-uds-core-tls meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate serviceaccount -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate clusterrolebinding pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate clusterrole pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate role -n pepr-system pepr-uds-core-store meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate rolebinding -n pepr-system pepr-uds-core-store meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate service -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate service -n pepr-system pepr-uds-core-watcher meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate deployment -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate deployment -n pepr-system pepr-uds-core-watcher meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate mutatingwebhookconfiguration -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true + ./zarf tools kubectl annotate validatingwebhookconfiguration -n pepr-system pepr-uds-core meta.helm.sh/release-name=module --overwrite || true diff --git a/src/promtail/README.md b/src/promtail/README.md deleted file mode 100644 index 447959057..000000000 --- a/src/promtail/README.md +++ /dev/null @@ -1 +0,0 @@ -## Promtail diff --git a/src/promtail/chart/templates/service.yaml b/src/promtail/chart/templates/service.yaml deleted file mode 100644 index 23c6a4429..000000000 --- a/src/promtail/chart/templates/service.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Upstream chart can create this service but it is conditionally tied to the serviceMonitor which would cause errors in single package testing -# This would be resolved by https://github.com/grafana/helm-charts/pull/3083 when merged and released -apiVersion: v1 -kind: Service -metadata: - name: promtail-metrics - namespace: {{ .Release.Namespace }} - labels: - app.kubernetes.io/name: promtail -spec: - clusterIP: None - ports: - - name: http-metrics - port: 3101 - targetPort: http-metrics - protocol: TCP - selector: - app.kubernetes.io/name: promtail diff --git a/src/promtail/chart/templates/uds-exemption.yaml b/src/promtail/chart/templates/uds-exemption.yaml deleted file mode 100644 index 9b8bca9cf..000000000 --- a/src/promtail/chart/templates/uds-exemption.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: uds.dev/v1alpha1 -kind: Exemption -metadata: - name: promtail - namespace: uds-policy-exemptions -spec: - exemptions: - - policies: - - DisallowPrivileged - - RequireNonRootUser - - RestrictSELinuxType - - RestrictHostPathWrite - - RestrictVolumeTypes - matcher: - namespace: promtail - name: "^promtail-.*" - title: "promtail exemptions" - description: "Promtail mounts the following hostPaths: - - `/var/log/pods`: to tail pod logs - - `/var/lib/docker/containers`: to tail container logs - - `/run/promtail`: for Promtail's buffering and persistent state - Since logs can have sensitive information, it is better to exclude - Promtail from the policy than add the paths as allowable mounts - https://github.com/grafana/helm-charts/blob/main/charts/promtail/templates/daemonset.yaml#L120" diff --git a/src/promtail/chart/templates/uds-package.yaml b/src/promtail/chart/templates/uds-package.yaml deleted file mode 100644 index 98a46eca7..000000000 --- a/src/promtail/chart/templates/uds-package.yaml +++ /dev/null @@ -1,37 +0,0 @@ -apiVersion: uds.dev/v1alpha1 -kind: Package -metadata: - name: promtail - namespace: {{ .Release.Namespace }} -spec: - monitor: - - selector: - app.kubernetes.io/name: promtail - targetPort: 3101 - portName: http-metrics - description: Metrics - - network: - allow: - - direction: Ingress - selector: - app.kubernetes.io/name: promtail - remoteNamespace: monitoring - remoteSelector: - app.kubernetes.io/name: prometheus - port: 3101 - description: "Prometheus Metrics" - - - direction: Egress - selector: - app.kubernetes.io/name: promtail - remoteGenerated: KubeAPI - - - direction: Egress - selector: - app.kubernetes.io/name: promtail - remoteNamespace: loki - remoteSelector: - app.kubernetes.io/name: loki - port: 8080 - description: "Write Logs to Loki" diff --git a/src/promtail/chart/values.yaml b/src/promtail/chart/values.yaml deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/promtail/common/zarf.yaml b/src/promtail/common/zarf.yaml deleted file mode 100644 index be2384ee2..000000000 --- a/src/promtail/common/zarf.yaml +++ /dev/null @@ -1,32 +0,0 @@ -kind: ZarfPackageConfig -metadata: - name: uds-core-promtail-common - description: "UDS Core Promtail Common" - url: "https://grafana.com/docs/loki/latest/" - -components: - - name: promtail - required: true - charts: - - name: uds-promtail-config - namespace: promtail - version: 0.1.0 - localPath: ../chart - - name: promtail - url: https://grafana.github.io/helm-charts/ - version: 6.16.5 - namespace: promtail - gitPath: charts/promtail - valuesFiles: - - ../values/values.yaml - actions: - onDeploy: - after: - - description: Validate Promtail Package - maxTotalSeconds: 300 - wait: - cluster: - kind: packages.uds.dev - name: promtail - namespace: promtail - condition: "'{.status.phase}'=Ready" diff --git a/src/promtail/tasks.yaml b/src/promtail/tasks.yaml deleted file mode 100644 index 8117f590a..000000000 --- a/src/promtail/tasks.yaml +++ /dev/null @@ -1,10 +0,0 @@ -tasks: - - name: validate - actions: - - description: Validate promtail - wait: - cluster: - kind: Pod - name: app.kubernetes.io/instance=promtail - namespace: promtail - condition: Ready diff --git a/src/promtail/values/registry1-values.yaml b/src/promtail/values/registry1-values.yaml deleted file mode 100644 index 6dec37593..000000000 --- a/src/promtail/values/registry1-values.yaml +++ /dev/null @@ -1,10 +0,0 @@ -image: - registry: registry1.dso.mil - repository: ironbank/opensource/grafana/promtail - tag: v3.1.1 -sidecar: - configReloader: - image: - registry: registry1.dso.mil - repository: ironbank/opensource/jimmidyson/configmap-reload - tag: v0.13.1 diff --git a/src/promtail/values/unicorn-values.yaml b/src/promtail/values/unicorn-values.yaml deleted file mode 100644 index c2248c2a6..000000000 --- a/src/promtail/values/unicorn-values.yaml +++ /dev/null @@ -1,10 +0,0 @@ -image: - registry: cgr.dev - repository: du-uds-defenseunicorns/promtail - tag: 3.1.1 -sidecar: - configReloader: - image: - registry: cgr.dev - repository: du-uds-defenseunicorns/configmap-reload-fips - tag: 0.13.1 diff --git a/src/promtail/values/upstream-values.yaml b/src/promtail/values/upstream-values.yaml deleted file mode 100644 index 9c9dc6f40..000000000 --- a/src/promtail/values/upstream-values.yaml +++ /dev/null @@ -1,10 +0,0 @@ -image: - registry: docker.io - repository: grafana/promtail - tag: 3.1.1 -sidecar: - configReloader: - image: - registry: ghcr.io - repository: jimmidyson/configmap-reload - tag: v0.13.1 diff --git a/src/promtail/values/values.yaml b/src/promtail/values/values.yaml deleted file mode 100644 index d7bb9af71..000000000 --- a/src/promtail/values/values.yaml +++ /dev/null @@ -1,116 +0,0 @@ -config: - clients: - - url: 'http://loki-gateway.loki.svc.cluster.local:80/loki/api/v1/push' - - snippets: - scrapeConfigs: | - # Upstream Defaults https://github.com/grafana/helm-charts/blob/main/charts/promtail/values.yaml - # See also https://github.com/grafana/loki/blob/master/production/ksonnet/promtail/scrape_config.libsonnet for reference - - job_name: kubernetes-pods - pipeline_stages: - {{- toYaml .Values.config.snippets.pipelineStages | nindent 4 }} - kubernetes_sd_configs: - - role: pod - relabel_configs: - - source_labels: - - __meta_kubernetes_pod_controller_name - regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})? - action: replace - target_label: __tmp_controller_name - - source_labels: - - __meta_kubernetes_pod_label_app_kubernetes_io_name - - __meta_kubernetes_pod_label_app - - __tmp_controller_name - - __meta_kubernetes_pod_name - regex: ^;*([^;]+)(;.*)?$ - action: replace - target_label: app - - source_labels: - - __meta_kubernetes_pod_label_app_kubernetes_io_instance - - __meta_kubernetes_pod_label_instance - regex: ^;*([^;]+)(;.*)?$ - action: replace - target_label: instance - - source_labels: - - __meta_kubernetes_pod_label_app_kubernetes_io_component - - __meta_kubernetes_pod_label_component - regex: ^;*([^;]+)(;.*)?$ - action: replace - target_label: component - {{- if .Values.config.snippets.addScrapeJobLabel }} - - replacement: kubernetes-pods - target_label: scrape_job - {{- end }} - {{- toYaml .Values.config.snippets.common | nindent 4 }} - {{- with .Values.config.snippets.extraRelabelConfigs }} - {{- toYaml . | nindent 4 }} - {{- end }} - # UDS CORE Defaults - - job_name: systemd-messages - static_configs: - - targets: [localhost] - labels: - job: varlogs - host: "${NODE_HOSTNAME}" - __path__: /var/log/* - relabel_configs: - - source_labels: - - __journal_systemd_unit - target_label: systemd_unit - - source_labels: - - __journal_hostname - target_label: nodename - - source_labels: - - __journal_syslog_identifier - target_label: syslog_identifier - - job_name: kubernetes-logs - static_configs: - - targets: [localhost] - labels: - job: kubernetes-logs - host: "${NODE_HOSTNAME}" - __path__: /var/log/kubernetes/**/*.log - -containerSecurityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - privileged: false - readOnlyRootFilesystem: true - runAsUser: 0 - seLinuxOptions: - type: spc_t -extraArgs: - - '-config.expand-env=true' - -extraEnv: - - name: NODE_HOSTNAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - -extraVolumes: - - hostPath: - path: /var/log - name: varlog - - hostPath: - path: /etc - name: machine-id - -extraVolumeMounts: - - mountPath: /var/log - name: varlog - readOnly: true - - mountPath: /etc/machine-id - name: machine-id - readOnly: true - subPath: machine-id - -resources: - limits: - cpu: 500m - memory: 750Mi - requests: - cpu: 100m - memory: 256Mi diff --git a/src/promtail/zarf.yaml b/src/promtail/zarf.yaml deleted file mode 100644 index 69354c754..000000000 --- a/src/promtail/zarf.yaml +++ /dev/null @@ -1,51 +0,0 @@ -kind: ZarfPackageConfig -metadata: - name: uds-core-promtail - description: "UDS Core Promtail" - url: "https://grafana.com/docs/loki/latest/" - -components: - - name: promtail - required: true - description: "Deploy Promtail" - only: - flavor: upstream - import: - path: common - charts: - - name: promtail - valuesFiles: - - values/upstream-values.yaml - images: - - ghcr.io/jimmidyson/configmap-reload:v0.13.1 - - docker.io/grafana/promtail:3.1.1 - - - name: promtail - required: true - description: "Deploy Promtail" - only: - flavor: registry1 - import: - path: common - charts: - - name: promtail - valuesFiles: - - values/registry1-values.yaml - images: - - registry1.dso.mil/ironbank/opensource/jimmidyson/configmap-reload:v0.13.1 - - registry1.dso.mil/ironbank/opensource/grafana/promtail:v3.1.1 - - - name: promtail - required: true - description: "Deploy Promtail" - only: - flavor: unicorn - import: - path: common - charts: - - name: promtail - valuesFiles: - - values/unicorn-values.yaml - images: - - cgr.dev/du-uds-defenseunicorns/configmap-reload-fips:0.13.1 - - cgr.dev/du-uds-defenseunicorns/promtail:3.1.1 diff --git a/src/vector/README.md b/src/vector/README.md new file mode 100644 index 000000000..d09241ca4 --- /dev/null +++ b/src/vector/README.md @@ -0,0 +1,42 @@ +# Vector + +Vector is a lightweight tool for building observability pipelines, built and maintained primarily by Datadog. Within UDS Core it is primarily used for log collection and shipping to destinations (like Loki and S3). + +## Switching from Promtail to Vector + +Within UDS Core we have made the decision to switch from Promtail (historically the log collector/shipper of choice) to Vector. The below contains primary motivating factors and impacts of this choice. + +### Motivations + +Promtail has historically been the tool of choice for log collection/shipping when using Loki. It provides a very lightweight layer to scrape logs from pods and hosts, label them with additional metadata, and ship them to Loki. + +One of the main issues that has arisen with Promtail is its limited output/export options. Promtail only supports sending logs to one or more Loki instances. A common requirement in production environments is to ship logs to a secondary destination for collection/analysis by security teams and SIEM tools. Promtail is currently listed as [feature complete](https://grafana.com/docs/loki/latest/send-data/promtail/) so there is no expectation that additional export functionality would be added. + +### Goals and Options + +In choosing an alternative to Promtail we have a few primary objectives: +- Chosen tool must be capable of gathering host and pod logs: This has been our primary usage of Promtail in the past - gathering pods logs and host logs (to include k8s audit logs, controlplane logs, etc). +- Provide a tool that has numerous export options to cover specific needs for environments: Current known requirements include Loki, S3, and SIEM tools like Elastic and Splunk. Ideally the tool of choice supports all of these and more, allowing for expansion as new environments require it. +- Choose a tool that does not require major changes in our logging stack, but is flexible for future adjustments to the stack: As we do have active users of our product we want to be careful in switching tools, so ideally we would like a tool that is a "drop-in" replacement. However, we don't want to rule out future changes to other pieces of the stack (i.e. Loki) so choosing a tool that doesn't lock us into Loki is important. +- Focus on the log collection/shipping problem: While there are a number of tools that offer far more than just logging pipelines (metrics, traces, etc), we don't currently see a need to focus on these tools. These features are seen as a nice to have, but not being evaluated as the focus here. + +Three tools in the space of log collection were considered: +- [Vector](https://vector.dev/): Opensource and maintained by Datadog, Vector provides input integrations with Kubernetes logs, arbitrary files, and [other sources](https://vector.dev/docs/reference/configuration/sources/). It has the necessary export integrations with Loki, S3, Elastic, Splunk and a [number of other sinks](https://vector.dev/docs/reference/configuration/sinks/). Vector is a newer tool that has not yet reached a 1.0 release, but has risen in popularity due to its performance improvements over other tools. +- [FluentBit](https://fluentbit.io/): Fluentbit was historically used in Big Bang and supports file based inputs as well as [other inputs](https://docs.fluentbit.io/manual/pipeline/inputs). It also supports the necessary output integrations (Loki, S3, Elastic, Splunk and [others](https://docs.fluentbit.io/manual/pipeline/outputs)). FluentBit is a CNCF graduated project and is relatively mature. Fluentbit fell out of favor with Big Bang due to some of the complexities around managing it at scale, specifically with its buffering. +- [Grafana Alloy](https://grafana.com/docs/alloy/latest/): Alloy is a distribution of the OpenTelemetry Collector, opensource and maintained by Grafana Labs. It supports the necessary [inputs and outputs](https://grafana.com/docs/alloy/latest/reference/components/) (local file/k8s logs, Loki and S3). As a distribution of OTel it supports vendor-agnostic output formats and can be integrated with numerous other tools through the OTel ecosystem. While Alloy itself is relatively new, it is built on the previous codebase of Grafana Agent and the existing OTel framework. Notably it does not have any direct integrations with Splunk or Elastic, and its S3 integration is noted as experimental. + +### Decision and Impact + +Vector has been chosen as our replacement for Promtail. Primary motivations include: +- Vector has an extensive "component" catalog for inputs and outputs, with complete coverage of all currently desired export locations (and all are noted as "stable" integrations). +- Vector's configuration is simple and works well in helm/with UDS helm overrides (easy to add additional export locations via bundle overrides for example). +- Despite being a newer project, Vector's community is very active - with the most active contributors and GitHub stars compared to the other two tools. +- Vector is [significantly more performant](https://github.com/vectordotdev/vector?tab=readme-ov-file#performance) than other tooling in the space on most categories of metrics. + +As with any decisions of tooling in core this can always be reevaluated in the future as different tools or factors affect how we look at our logging stack. + +### Upgrade Considerations + +During the upgrade there may be some duplication/overlap of log lines shipped to Loki due to the transition from Promtail's "position" file to Vector's "checkpoint" file (both used for tracking the last log line scraped/shipped). Grafana provides a built in feature to de-duplicate log entries when querying Loki, but this does not consistently work with all log lines due to the approach used by Grafana for de-duplication. + +To ensure easy querying of logs across the upgrade, all logs shipped by Vector also have a `collector` label (with the value of `vector`). This can be used to filter down any logs to those collected by either Vector or Promtail (using the `=` and `!=` operators). In general you can use these filters along with tracking your upgrade timing to properly ignore duplicate logs for the short upgrade period. diff --git a/src/promtail/chart/.helmignore b/src/vector/chart/.helmignore similarity index 100% rename from src/promtail/chart/.helmignore rename to src/vector/chart/.helmignore diff --git a/src/promtail/chart/Chart.yaml b/src/vector/chart/Chart.yaml similarity index 91% rename from src/promtail/chart/Chart.yaml rename to src/vector/chart/Chart.yaml index 84403fdd5..6b5ca4898 100644 --- a/src/promtail/chart/Chart.yaml +++ b/src/vector/chart/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 -name: uds-promtail-config -description: Promtail configuration for UDS +name: uds-vector-config +description: Vector configuration for UDS # A chart can be either an 'application' or a 'library' chart. # diff --git a/src/promtail/chart/templates/_helpers.tpl b/src/vector/chart/templates/_helpers.tpl similarity index 71% rename from src/promtail/chart/templates/_helpers.tpl rename to src/vector/chart/templates/_helpers.tpl index e2736937a..7290ba589 100644 --- a/src/promtail/chart/templates/_helpers.tpl +++ b/src/vector/chart/templates/_helpers.tpl @@ -1,7 +1,7 @@ {{/* Expand the name of the chart. */}} -{{- define "uds-promtail-config.name" -}} +{{- define "uds-vector-config.name" -}} {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} @@ -10,7 +10,7 @@ Create a default fully qualified app name. We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). If release name contains chart name it will be used as a full name. */}} -{{- define "uds-promtail-config.fullname" -}} +{{- define "uds-vector-config.fullname" -}} {{- if .Values.fullnameOverride }} {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} {{- else }} @@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name. {{/* Create chart name and version as used by the chart label. */}} -{{- define "uds-promtail-config.chart" -}} +{{- define "uds-vector-config.chart" -}} {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} {{- end }} {{/* Common labels */}} -{{- define "uds-promtail-config.labels" -}} -helm.sh/chart: {{ include "uds-promtail-config.chart" . }} -{{ include "uds-promtail-config.selectorLabels" . }} +{{- define "uds-vector-config.labels" -}} +helm.sh/chart: {{ include "uds-vector-config.chart" . }} +{{ include "uds-vector-config.selectorLabels" . }} {{- if .Chart.AppVersion }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} @@ -45,17 +45,17 @@ app.kubernetes.io/managed-by: {{ .Release.Service }} {{/* Selector labels */}} -{{- define "uds-promtail-config.selectorLabels" -}} -app.kubernetes.io/name: {{ include "uds-promtail-config.name" . }} +{{- define "uds-vector-config.selectorLabels" -}} +app.kubernetes.io/name: {{ include "uds-vector-config.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} {{/* Create the name of the service account to use */}} -{{- define "uds-promtail-config.serviceAccountName" -}} +{{- define "uds-vector-config.serviceAccountName" -}} {{- if .Values.serviceAccount.create }} -{{- default (include "uds-promtail-config.fullname" .) .Values.serviceAccount.name }} +{{- default (include "uds-vector-config.fullname" .) .Values.serviceAccount.name }} {{- else }} {{- default "default" .Values.serviceAccount.name }} {{- end }} diff --git a/src/vector/chart/templates/uds-exemption.yaml b/src/vector/chart/templates/uds-exemption.yaml new file mode 100644 index 000000000..0c6032102 --- /dev/null +++ b/src/vector/chart/templates/uds-exemption.yaml @@ -0,0 +1,21 @@ +apiVersion: uds.dev/v1alpha1 +kind: Exemption +metadata: + name: vector + namespace: uds-policy-exemptions +spec: + exemptions: + - policies: + - RequireNonRootUser + - RestrictSELinuxType + - RestrictHostPathWrite + - RestrictVolumeTypes + matcher: + namespace: vector + name: "^vector-.*" + title: "vector exemptions" + description: "Vector mounts the following hostPaths: + - `/var/log`: to tail logs + - `/var/lib/vector`: for Vector's buffering and persistent state + Since logs can have sensitive information, it is better to exclude + Vector from the policy than add the paths as allowable mounts" diff --git a/src/vector/chart/templates/uds-package.yaml b/src/vector/chart/templates/uds-package.yaml new file mode 100644 index 000000000..b6bf5bbc1 --- /dev/null +++ b/src/vector/chart/templates/uds-package.yaml @@ -0,0 +1,46 @@ +apiVersion: uds.dev/v1alpha1 +kind: Package +metadata: + name: vector + namespace: {{ .Release.Namespace }} +spec: + network: + allow: + - direction: Ingress + selector: + app.kubernetes.io/name: vector + remoteNamespace: monitoring + remoteSelector: + app.kubernetes.io/name: prometheus + port: 9090 + description: "Prometheus Metrics" + + - direction: Egress + selector: + app.kubernetes.io/name: vector + remoteGenerated: KubeAPI + + - direction: Egress + selector: + app.kubernetes.io/name: vector + remoteNamespace: loki + remoteSelector: + app.kubernetes.io/name: loki + port: 8080 + description: "Write Logs to Loki" + + # Custom rules for additional networking access + {{- range .Values.additionalNetworkAllow }} + - direction: {{ .direction }} + selector: + {{ .selector | toYaml | nindent 10 }} + {{- if not .remoteGenerated }} + remoteNamespace: {{ .remoteNamespace }} + remoteSelector: + {{ .remoteSelector | toYaml | nindent 10 }} + port: {{ .port }} + {{- else }} + remoteGenerated: {{ .remoteGenerated }} + {{- end }} + description: {{ .description }} + {{- end }} diff --git a/src/vector/chart/values.yaml b/src/vector/chart/values.yaml new file mode 100644 index 000000000..f2d4d867f --- /dev/null +++ b/src/vector/chart/values.yaml @@ -0,0 +1,16 @@ +additionalNetworkAllow: [] +# Examples: +# - direction: Egress +# selector: +# app.kubernetes.io/name: vector +# remoteNamespace: elastic +# remoteSelector: +# app.kubernetes.io/name: elastic +# port: 9090 +# description: "Elastic Storage" +# - direction: Egress +# selector: +# app.kubernetes.io/name: vector +# remoteGenerated: Anywhere +# port: 80 +# description: "S3 Storage" diff --git a/src/vector/common/zarf.yaml b/src/vector/common/zarf.yaml new file mode 100644 index 000000000..b020db0e8 --- /dev/null +++ b/src/vector/common/zarf.yaml @@ -0,0 +1,38 @@ +kind: ZarfPackageConfig +metadata: + name: uds-core-vector-common + description: "UDS Core Vector Common" + url: "https://vector.dev/" + +components: + - name: vector + required: true + charts: + - name: uds-vector-config + namespace: vector + version: 0.1.0 + localPath: ../chart + - name: vector + url: https://helm.vector.dev + version: 0.36.1 + namespace: vector + gitPath: charts/vector + valuesFiles: + - ../values/values.yaml + actions: + onDeploy: + before: + - description: Remove Promtail Components if necessary + mute: true + cmd: | + ./zarf package remove core --components promtail --confirm || true # Ensure this doesn't error on installs and upgrades when Promtail no longer exists + ./zarf tools kubectl delete ns promtail || true # Ensure this doesn't error on installs and upgrades when Promtail no longer exists + after: + - description: Validate Vector Package + maxTotalSeconds: 300 + wait: + cluster: + kind: Packages + name: vector + namespace: vector + condition: "'{.status.phase}'=Ready" diff --git a/src/promtail/oscal-component.yaml b/src/vector/oscal-component.yaml similarity index 86% rename from src/promtail/oscal-component.yaml rename to src/vector/oscal-component.yaml index 94635da4e..fef87cc00 100644 --- a/src/promtail/oscal-component.yaml +++ b/src/vector/oscal-component.yaml @@ -1,7 +1,7 @@ component-definition: uuid: ff959bdb-7be9-49b3-9dc2-c41b34e7017d metadata: - title: Promtail + title: Vector last-modified: "2024-01-31T16:44:35Z" version: "20240132" oscal-version: 1.1.2 @@ -15,7 +15,7 @@ component-definition: components: - uuid: 3ca1e9a3-a566-48d1-93af-200abd1245e3 type: software - title: Promtail + title: Vector description: | Log collector purpose: Collects logs from the cluster @@ -26,7 +26,7 @@ component-definition: control-implementations: - uuid: d2afb4c4-2cd8-5305-a6cc-d1bc7b388d0c source: https://raw.githubusercontent.com/GSA/fedramp-automation/93ca0e20ff5e54fc04140613476fba80f08e3c7d/dist/content/rev5/baselines/json/FedRAMP_rev5_HIGH-baseline-resolved-profile_catalog.json - description: Controls implemented by Promtail for inheritance by applications + description: Controls implemented by Vector for inheritance by applications implemented-requirements: - uuid: 954ba9c8-452c-4503-a43f-c880a01b828d control-id: ac-6.9 @@ -36,7 +36,7 @@ component-definition: Auditing the use of privileged functions is one way to detect such misuse, and in doing so, help mitigate the risk from insider threats and the advanced persistent threat (APT). # Control Implementation - Promtail can be configured to collect all logs from Kubernetes and underlying operating systems, allowing the aggregation of privileged function calls. + Vector can be configured to collect all logs from Kubernetes and underlying operating systems, allowing the aggregation of privileged function calls. remarks: This control is fully implemented by this tool. links: - href: "#98b97ec9-a9ce-4444-83d8-71066270a424" @@ -58,7 +58,7 @@ component-definition: # Control Implementation Logging daemons are present on each node that BigBang is installed on. Out of the box, the following events are captured: - * all containers emitting to STDOUT or STDERR (captured by container runtime translating container logs to /var/log/containers). + * all containers emitting to STDOUT or STDERR (captured by container runtime creating containers logs under /var/log/pods). * all kubernetes api server requests. * all events emitted by the kubelet. remarks: This control is fully implemented by this tool. @@ -78,9 +78,7 @@ component-definition: Event outcomes can include indicators of event success or failure and event-specific results (e.g., the security state of the information system after the event occurred). # Control Implementation - Logs are captured by promtail from the node. The node logs will contain the necessary log data from all pods/applications inside the selected nodes. - Validating `logfmt` as the config.logFormat would be the goal. This is currently a secret mounted to /etc/promtail/promtail.yaml in the promtail container. We will ensure the promtail.yaml file is at a minimum the target config. - https://grafana.com/docs/loki/latest/send-data/promtail/stages/logfmt/ + Logs are captured by vector from the node. The node logs will contain the necessary log data from all pods/applications inside the selected nodes as well as Kubernetes audit logs. remarks: This control is fully implemented by this tool. links: - href: "#98b97ec9-a9ce-4444-83d8-71066270a424" @@ -105,8 +103,6 @@ component-definition: * time of the event (UTC). * source of event (pod, namespace, container id). Applications are responsible for providing all other information. - Validating `logfmt` as the config.logFormat would be the goal. This is currently a secret mounted to /etc/promtail/promtail.yaml in the promtail container. We will ensure the promtail.yaml file is at a minimum the target config. - https://grafana.com/docs/loki/latest/send-data/promtail/stages/logfmt/ remarks: This control is fully implemented by this tool. links: - href: "#98b97ec9-a9ce-4444-83d8-71066270a424" @@ -122,13 +118,9 @@ component-definition: back-matter: resources: - uuid: D552C935-E40C-4A03-B5CC-4605EBD95B6D - title: Promtail + title: Vector rlinks: - - href: https://grafana.com/docs/loki/latest/clients/promtail/ - - uuid: 211C474B-E11A-4DD2-8075-50CDAC507CDC - title: Big Bang Promtail package - rlinks: - - href: https://repo1.dso.mil/platform-one/big-bang/apps/sandbox/promtail + - href: https://vector.dev/ - uuid: 98b97ec9-a9ce-4444-83d8-71066270a424 title: Lula Validation rlinks: @@ -145,7 +137,7 @@ component-definition: Group: apps Version: v1 Resource: daemonsets - Namespaces: [promtail] + Namespaces: [vector] rego: | package validate @@ -177,7 +169,7 @@ component-definition: Group: Version: v1 Resource: pods - Namespaces: [promtail] + Namespaces: [vector] rego: | package validate @@ -213,7 +205,7 @@ component-definition: Group: Version: v1 Resource: pods - Namespaces: [promtail] + Namespaces: [vector] rego: | package validate @@ -250,7 +242,7 @@ component-definition: Group: Version: v1 Resource: pods - Namespaces: [promtail] + Namespaces: [vector] rego: | package validate @@ -261,8 +253,9 @@ component-definition: containers := pod.spec.containers some container in containers - container.name == "promtail" + container.name == "vector" some i - container.args[i] == "-config.file=/etc/promtail/promtail.yaml" + container.args[i] == "--config-dir" + container.args[i] == "/etc/vector/" } } diff --git a/src/vector/tasks.yaml b/src/vector/tasks.yaml new file mode 100644 index 000000000..69dfbf4ff --- /dev/null +++ b/src/vector/tasks.yaml @@ -0,0 +1,10 @@ +tasks: + - name: validate + actions: + - description: Validate vector + wait: + cluster: + kind: Pod + name: app.kubernetes.io/name=vector + namespace: vector + condition: Ready diff --git a/src/vector/values/registry1-values.yaml b/src/vector/values/registry1-values.yaml new file mode 100644 index 000000000..85509e7b4 --- /dev/null +++ b/src/vector/values/registry1-values.yaml @@ -0,0 +1,3 @@ +image: + repository: registry1.dso.mil/ironbank/opensource/timberio/vector + tag: 0.41.1 diff --git a/src/vector/values/unicorn-values.yaml b/src/vector/values/unicorn-values.yaml new file mode 100644 index 000000000..d90700602 --- /dev/null +++ b/src/vector/values/unicorn-values.yaml @@ -0,0 +1,3 @@ +image: + repository: cgr.dev/du-uds-defenseunicorns/vector + tag: 0.41.1 diff --git a/src/vector/values/upstream-values.yaml b/src/vector/values/upstream-values.yaml new file mode 100644 index 000000000..5180f3c7c --- /dev/null +++ b/src/vector/values/upstream-values.yaml @@ -0,0 +1,3 @@ +image: + repository: timberio/vector + tag: 0.41.1-distroless-static diff --git a/src/vector/values/values.yaml b/src/vector/values/values.yaml new file mode 100644 index 000000000..7bbe3ee60 --- /dev/null +++ b/src/vector/values/values.yaml @@ -0,0 +1,118 @@ +# Run as an agent daemonset +role: "Agent" + +customConfig: + data_dir: /var/lib/vector + # Ensure e2e delivery of events + acknowledgements: + enabled: true + sources: + pod_logs: + type: "kubernetes_logs" + oldest_first: true + node_logs: + type: "file" + include: ["/var/log/*", "/var/log/kubernetes/**/*.log"] + oldest_first: true + internal_metrics: + type: internal_metrics + + transforms: + pod_logs_labelled: + type: remap + inputs: ["pod_logs"] + source: | + if exists(.kubernetes.pod_labels."app.kubernetes.io/name") { + .app = .kubernetes.pod_labels."app.kubernetes.io/name" + } else if exists(.kubernetes.pod_labels.app) { + .app = .kubernetes.pod_labels.app + } else if exists(.kubernetes.pod_owner) { + .app = replace!(.kubernetes.pod_owner, r'^([^/]+/)', "") + } else { + .app = .kubernetes.pod_name + } + + if exists(.kubernetes.pod_labels."app.kubernetes.io/component") { + .component = .kubernetes.pod_labels."app.kubernetes.io/component" + } else if !exists(.kubernetes.pod_labels.component) { + .component = .kubernetes.pod_labels.component + } else { + .component = "" + } + + node_logs_labelled: + type: remap + inputs: ["node_logs"] + source: | + .node_name = "${NODE_HOSTNAME}" + if contains(string!(.file), "/var/log/kubernetes/") { + .job = "kubernetes-logs" + } else { + .job = "varlogs" + } + + sinks: + loki_pod: + type: "loki" + inputs: ["pod_logs_labelled"] + endpoint: "http://loki-gateway.loki.svc.cluster.local:80" + path: "/loki/api/v1/push" + encoding: + codec: "raw_message" + labels: + namespace: '{{`{{ kubernetes.pod_namespace }}`}}' + app: '{{`{{ app }}`}}' + job: '{{`{{ kubernetes.pod_namespace }}`}}/{{`{{ app }}`}}' + container: '{{`{{ kubernetes.container_name }}`}}' + component: '{{`{{ component }}`}}' + host: '{{`{{ kubernetes.pod_node_name }}`}}' + filename: '{{`{{ file }}`}}' + collector: "vector" + buffer: + type: disk + max_size: 1073741824 # 1GiB + loki_host: + type: "loki" + inputs: ["node_logs_labelled"] + endpoint: "http://loki-gateway.loki.svc.cluster.local:80" + path: "/loki/api/v1/push" + encoding: + codec: "raw_message" + labels: + job: '{{`{{ job }}`}}' + host: '{{`{{ node_name }}`}}' + filename: '{{`{{ file }}`}}' + collector: "vector" + buffer: + type: disk + max_size: 1073741824 # 1GiB + prom_exporter: + type: prometheus_exporter + inputs: [internal_metrics] + address: 0.0.0.0:9090 + +persistence: + enabled: true + hostPath: + enabled: true + path: "/var/lib/vector" + +podMonitor: + enabled: true +service: + ports: + - name: prom-exporter + port: 9090 + protocol: TCP + +securityContext: + readOnlyRootFilesystem: true + runAsUser: 0 + seLinuxOptions: + type: spc_t + +env: + - name: NODE_HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName diff --git a/src/vector/zarf.yaml b/src/vector/zarf.yaml new file mode 100644 index 000000000..738476d7d --- /dev/null +++ b/src/vector/zarf.yaml @@ -0,0 +1,48 @@ +kind: ZarfPackageConfig +metadata: + name: uds-core-vector + description: "UDS Core Vector" + url: "https://vector.dev/" + +components: + - name: vector + required: true + description: "Deploy Vector" + only: + flavor: upstream + import: + path: common + charts: + - name: vector + valuesFiles: + - values/upstream-values.yaml + images: + - timberio/vector:0.41.1-distroless-static + + - name: vector + required: true + description: "Deploy Vector" + only: + flavor: registry1 + import: + path: common + charts: + - name: vector + valuesFiles: + - values/registry1-values.yaml + images: + - registry1.dso.mil/ironbank/opensource/timberio/vector:0.41.1 + + - name: vector + required: true + description: "Deploy Vector" + only: + flavor: unicorn + import: + path: common + charts: + - name: vector + valuesFiles: + - values/unicorn-values.yaml + images: + - cgr.dev/du-uds-defenseunicorns/vector:0.41.1