From ca9882adde155e94031e3f640b18e732919882ad Mon Sep 17 00:00:00 2001 From: Jacob Babor Date: Tue, 9 Jul 2024 15:43:41 -0500 Subject: [PATCH] Move Prometheus master to its own container and deploy some scraping on each node --- playbooks/prod_web.yml | 5 +++- playbooks/tags_nagios.yml | 26 +++++++++++++++++++ roles/prometheus/handlers/main.yml | 5 ++++ .../prometheus/tasks/main.yml | 17 +++++++----- roles/prometheus/templates/prometheus.yml.j2 | 26 +++++++++++++++++++ 5 files changed, 71 insertions(+), 8 deletions(-) create mode 100644 roles/prometheus/handlers/main.yml rename playbooks/tasks/web/prometheus.yml => roles/prometheus/tasks/main.yml (60%) create mode 100644 roles/prometheus/templates/prometheus.yml.j2 diff --git a/playbooks/prod_web.yml b/playbooks/prod_web.yml index 6c758f5..f26d520 100755 --- a/playbooks/prod_web.yml +++ b/playbooks/prod_web.yml @@ -27,7 +27,6 @@ - web/grafana.yml - web/netbox.yml - web/nextcloud.yml - - web/prometheus.yml - web/synapse.yml # Backend web services - web/prowlarr.yml @@ -59,6 +58,8 @@ - repo: https://git.desu.ltd/salt/gitea-custom dest: /data/gitea/data/gitea/custom tags: [ web, git ] + - role: prometheus + tags: [ prometheus, monitoring ] - role: nagios vars: # Definitions for contacts and checks are defined in inventory vars @@ -132,6 +133,8 @@ - "allow 10.0.0.0/8" - "allow 172.16.0.0/12" - "allow 192.168.0.0/16" + # TODO: Replace this with a dynamically-generated list of public IPs from inv + - "allow 45.79.58.44/32" # bastion1.dallas.mgmt.desu.ltd - "deny all" proxy_pass: http://prometheus:9090 # desu.ltd media bullshit diff --git a/playbooks/tags_nagios.yml b/playbooks/tags_nagios.yml index 773622b..26b9ae1 100755 --- a/playbooks/tags_nagios.yml +++ b/playbooks/tags_nagios.yml @@ -35,6 +35,32 @@ - /usr/local/bin/monitoring-scripts/check_docker - /usr/local/bin/monitoring-scripts/check_temp tags: [ nagios, sudo ] + - name: assure prometheus node exporter + # https://github.com/prometheus/node_exporter + ansible.builtin.docker_container: + name: prometheus-node-exporter + image: quay.io/prometheus/node-exporter:latest + command: + - '--path.rootfs=/host' + network_mode: host + pid_mode: host + volumes: + - /:/host:ro,rslave + tags: [ prometheus ] + - name: assure prometheus cadvisor exporter + ansible.builtin.docker_container: + name: prometheus-cadvisor-exporter + image: gcr.io/cadvisor/cadvisor:latest + ports: + - 9101:8080/tcp + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker:/var/lib/docker:ro + - /dev/disk:/dev/disk:ro + devices: + - /dev/kmsg - hosts: all gather_facts: no tasks: diff --git a/roles/prometheus/handlers/main.yml b/roles/prometheus/handlers/main.yml new file mode 100644 index 0000000..e06c855 --- /dev/null +++ b/roles/prometheus/handlers/main.yml @@ -0,0 +1,5 @@ +#!/usr/bin/env ansible-playbook +# vim:ft=ansible: +- name: restart prometheus container + docker_container: name="prometheus" state=started restart=yes + become: yes diff --git a/playbooks/tasks/web/prometheus.yml b/roles/prometheus/tasks/main.yml similarity index 60% rename from playbooks/tasks/web/prometheus.yml rename to roles/prometheus/tasks/main.yml index bd08cd7..0252da0 100644 --- a/playbooks/tasks/web/prometheus.yml +++ b/roles/prometheus/tasks/main.yml @@ -9,13 +9,17 @@ with_items: - /data/prometheus/config - /data/prometheus/data - tags: [ docker, prometheus, monitoring ] + notify: restart prometheus container +- name: template out configuration file + ansible.builtin.template: + src: prometheus.yml.j2 + owner: 5476 + group: 5476 + mode: "0640" + dest: /data/prometheus/config/prometheus.yml + notify: restart prometheus container - name: docker deploy prometheus - # NOTE: If you're rebuilding this, you *need* to sync the data over as this - # container will try to explode if you run it with empty volumes. Copy over - # prometheus.yml into the config volume first or steal it from an empty fresh - # container instance - docker_container: + community.docker.docker_container: name: prometheus image: prom/prometheus:latest user: 5476:5476 @@ -27,4 +31,3 @@ volumes: - /data/prometheus/config:/etc/prometheus - /data/prometheus/data:/prometheus - tags: [ docker, prometheus, monitoring ] diff --git a/roles/prometheus/templates/prometheus.yml.j2 b/roles/prometheus/templates/prometheus.yml.j2 new file mode 100644 index 0000000..47f26f6 --- /dev/null +++ b/roles/prometheus/templates/prometheus.yml.j2 @@ -0,0 +1,26 @@ +# my global config +--- +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + # The job name is added as a label `job=` to any timeseries + # scraped from this config. + - job_name: "prometheus" + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + static_configs: + - targets: ["localhost:9090"] + - job_name: "node-exporter" + static_configs: + - targets: +{% for host in groups['tags_nagios'] %} + - '{{ host }}:9100' +{% endfor %} + - job_name: "cadvisor-exporter" + static_configs: + - targets: +{% for host in groups['tags_nagios'] %} + - '{{ host }}:9101' +{% endfor %}