From bcb432cc4abeb6074279c5e5e5df9532772b1fde Mon Sep 17 00:00:00 2001 From: Vitaly Date: Thu, 7 Sep 2023 15:16:44 +0800 Subject: [PATCH 1/6] Make grafana_agent Idempotent Remove forced stop Add handler --- roles/grafana_agent/handlers/main.yaml | 11 +++++++++ roles/grafana_agent/tasks/configure.yaml | 22 ++++-------------- roles/grafana_agent/tasks/ga-started.yaml | 28 +++++++++++++++++++++++ roles/grafana_agent/tasks/main.yaml | 5 ++++ 4 files changed, 48 insertions(+), 18 deletions(-) create mode 100644 roles/grafana_agent/handlers/main.yaml create mode 100644 roles/grafana_agent/tasks/ga-started.yaml diff --git a/roles/grafana_agent/handlers/main.yaml b/roles/grafana_agent/handlers/main.yaml new file mode 100644 index 00000000..3a7566a7 --- /dev/null +++ b/roles/grafana_agent/handlers/main.yaml @@ -0,0 +1,11 @@ +--- +- name: Restart Grafana Agent + ansible.builtin.service: + name: grafana-agent + state: restarted + daemon_reload: true + listen: "restart grafana-agent" + +- name: Check Grafana Agent is started properly + ansible.builtin.include_tasks: ga-started.yml + listen: "restart grafana-agent" diff --git a/roles/grafana_agent/tasks/configure.yaml b/roles/grafana_agent/tasks/configure.yaml index 8e066120..123c922a 100644 --- a/roles/grafana_agent/tasks/configure.yaml +++ b/roles/grafana_agent/tasks/configure.yaml @@ -1,22 +1,4 @@ --- -- name: Stop grafana-agent if installed - block: - # this will fail the verify first time of installation if local binary is used - - name: Ensure grafana-agent is stopped - ansible.builtin.systemd: - name: grafana-agent - enabled: true - state: stopped - rescue: - # make sure that the service isn't actually installed - - name: Get the list of services - ansible.builtin.service_facts: - - - name: Verify that grafana-agent is not installed - ansible.builtin.assert: - that: - - _grafana_agent_systemd_unit not in services - # these tasks are ran in both install and configure, as directories could have changed - name: Configure directories ansible.builtin.import_tasks: install/directories.yaml @@ -36,6 +18,7 @@ owner: root group: root mode: 0644 + notify: "restart grafana-agent" - name: Create the Service Environment file ansible.builtin.template: @@ -44,6 +27,7 @@ owner: root group: "{{ grafana_agent_user_group }}" mode: 0640 + notify: "restart grafana-agent" - name: Create Grafana Agent config ansible.builtin.template: @@ -53,6 +37,7 @@ owner: root group: "{{ grafana_agent_user_group }}" mode: 0640 + notify: "restart grafana-agent" when: grafana_agent_provisioned_config_file | length == 0 - name: Copy Grafana Agent config @@ -62,4 +47,5 @@ owner: root group: "{{ grafana_agent_user_group }}" mode: 0640 + notify: "restart grafana-agent" when: grafana_agent_provisioned_config_file | length > 0 diff --git a/roles/grafana_agent/tasks/ga-started.yaml b/roles/grafana_agent/tasks/ga-started.yaml new file mode 100644 index 00000000..37b9eadb --- /dev/null +++ b/roles/grafana_agent/tasks/ga-started.yaml @@ -0,0 +1,28 @@ +--- +- name: Health check Grafana Agent + ansible.builtin.uri: + url: "http://127.0.0.1:12345/-/ready" + follow_redirects: none + method: GET + register: _result + failed_when: false + until: _result.status == 200 + retries: 3 + delay: 2 + changed_when: false + +- name: Check system logs if Grafana Agent is not started + when: _result.status != 200 + block: + - name: Run journalctl + ansible.builtin.shell: + cmd: "journalctl -u grafana-agent -b -n20 --no-pager" + register: journal_ret + changed_when: false + - name: Output Grafana agent logs + ansible.builtin.debug: + var: journal_ret.stdout_lines + - name: Rise alerts + ansible.builtin.assert: + that: false + fail_msg: "Service grafana-agent hasn't started." diff --git a/roles/grafana_agent/tasks/main.yaml b/roles/grafana_agent/tasks/main.yaml index c42a5344..a31b484e 100644 --- a/roles/grafana_agent/tasks/main.yaml +++ b/roles/grafana_agent/tasks/main.yaml @@ -40,7 +40,12 @@ name: grafana-agent enabled: true state: started + failed_when: false + changed_when: false tags: - grafana_agent_install - grafana_agent_configure - grafana_agent_run + +- name: Check Grafana Agent is started properly + ansible.builtin.include_tasks: ga-started.yml From b8645176288b51663973a5bd1e9a39d54b1418c7 Mon Sep 17 00:00:00 2001 From: Vitaly Date: Fri, 8 Sep 2023 17:48:54 +0800 Subject: [PATCH 2/6] Add grafana_agent_health_check endpoint --- roles/grafana_agent/tasks/ga-started.yaml | 2 +- roles/grafana_agent/vars/main.yaml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/roles/grafana_agent/tasks/ga-started.yaml b/roles/grafana_agent/tasks/ga-started.yaml index 37b9eadb..476217b7 100644 --- a/roles/grafana_agent/tasks/ga-started.yaml +++ b/roles/grafana_agent/tasks/ga-started.yaml @@ -1,7 +1,7 @@ --- - name: Health check Grafana Agent ansible.builtin.uri: - url: "http://127.0.0.1:12345/-/ready" + url: "{{ _grafana_agent_healthcheck_endpoint }}" follow_redirects: none method: GET register: _result diff --git a/roles/grafana_agent/vars/main.yaml b/roles/grafana_agent/vars/main.yaml index 0eba3cb8..7808ba28 100644 --- a/roles/grafana_agent/vars/main.yaml +++ b/roles/grafana_agent/vars/main.yaml @@ -31,3 +31,6 @@ _grafana_agent_download_binary_file: "grafana-agent-{{ _grafana_agent_os_family # systemd info _grafana_agent_systemd_dir: /lib/systemd/system/ _grafana_agent_systemd_unit: grafana-agent.service + +# Server http address, used in self health check after start +_grafana_agent_healthcheck_endpoint: "http://{{ grafana_agent_flags_extra.server.http.address if ((grafana_agent_flags_extra.server|default({})).http|default({})).http.address is defined else '127.0.0.1:12345' }}/-/ready" From 0ce04de5ce4c7dee62146057c3529def0d83f36e Mon Sep 17 00:00:00 2001 From: Vitaly Date: Fri, 8 Sep 2023 19:46:32 +0800 Subject: [PATCH 3/6] Add become: true in restrat --- roles/grafana_agent/handlers/main.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/roles/grafana_agent/handlers/main.yaml b/roles/grafana_agent/handlers/main.yaml index 3a7566a7..46265068 100644 --- a/roles/grafana_agent/handlers/main.yaml +++ b/roles/grafana_agent/handlers/main.yaml @@ -1,5 +1,6 @@ --- - name: Restart Grafana Agent + become: true ansible.builtin.service: name: grafana-agent state: restarted From 2cafce7c1b42ad7798e397f0f58bf88b88ff96ef Mon Sep 17 00:00:00 2001 From: Vitaly Date: Fri, 8 Sep 2023 19:47:07 +0800 Subject: [PATCH 4/6] Fix condition --- roles/grafana_agent/vars/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/grafana_agent/vars/main.yaml b/roles/grafana_agent/vars/main.yaml index 7808ba28..c3094674 100644 --- a/roles/grafana_agent/vars/main.yaml +++ b/roles/grafana_agent/vars/main.yaml @@ -33,4 +33,4 @@ _grafana_agent_systemd_dir: /lib/systemd/system/ _grafana_agent_systemd_unit: grafana-agent.service # Server http address, used in self health check after start -_grafana_agent_healthcheck_endpoint: "http://{{ grafana_agent_flags_extra.server.http.address if ((grafana_agent_flags_extra.server|default({})).http|default({})).http.address is defined else '127.0.0.1:12345' }}/-/ready" +_grafana_agent_healthcheck_endpoint: "http://{{ grafana_agent_flags_extra['server.http.address'] if grafana_agent_flags_extra['server.http.address'] is defined else '127.0.0.1:12345' }}/-/ready" From 61eee6efa9d60418ce40ad2e2ac9488bd92c3bbf Mon Sep 17 00:00:00 2001 From: Vitaly Date: Fri, 8 Sep 2023 19:47:29 +0800 Subject: [PATCH 5/6] Increase delay --- roles/grafana_agent/tasks/ga-started.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/grafana_agent/tasks/ga-started.yaml b/roles/grafana_agent/tasks/ga-started.yaml index 476217b7..01106f42 100644 --- a/roles/grafana_agent/tasks/ga-started.yaml +++ b/roles/grafana_agent/tasks/ga-started.yaml @@ -8,7 +8,7 @@ failed_when: false until: _result.status == 200 retries: 3 - delay: 2 + delay: 5 changed_when: false - name: Check system logs if Grafana Agent is not started From ef257c46941d1672bba1c4dc71df4cc7beee1f05 Mon Sep 17 00:00:00 2001 From: Vitaly Date: Fri, 8 Sep 2023 19:48:13 +0800 Subject: [PATCH 6/6] Remove check on end (keep check only after restart) --- roles/grafana_agent/tasks/main.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/roles/grafana_agent/tasks/main.yaml b/roles/grafana_agent/tasks/main.yaml index a31b484e..9f3f51cc 100644 --- a/roles/grafana_agent/tasks/main.yaml +++ b/roles/grafana_agent/tasks/main.yaml @@ -46,6 +46,3 @@ - grafana_agent_install - grafana_agent_configure - grafana_agent_run - -- name: Check Grafana Agent is started properly - ansible.builtin.include_tasks: ga-started.yml