Move Prometheus master to its own container and deploy some scraping on each node

This commit is contained in:
Salt 2024-07-09 15:43:41 -05:00
parent e63898f328
commit ca9882adde
5 changed files with 71 additions and 8 deletions

View File

@ -27,7 +27,6 @@
- web/grafana.yml - web/grafana.yml
- web/netbox.yml - web/netbox.yml
- web/nextcloud.yml - web/nextcloud.yml
- web/prometheus.yml
- web/synapse.yml - web/synapse.yml
# Backend web services # Backend web services
- web/prowlarr.yml - web/prowlarr.yml
@ -59,6 +58,8 @@
- repo: https://git.desu.ltd/salt/gitea-custom - repo: https://git.desu.ltd/salt/gitea-custom
dest: /data/gitea/data/gitea/custom dest: /data/gitea/data/gitea/custom
tags: [ web, git ] tags: [ web, git ]
- role: prometheus
tags: [ prometheus, monitoring ]
- role: nagios - role: nagios
vars: vars:
# Definitions for contacts and checks are defined in inventory vars # Definitions for contacts and checks are defined in inventory vars
@ -132,6 +133,8 @@
- "allow 10.0.0.0/8" - "allow 10.0.0.0/8"
- "allow 172.16.0.0/12" - "allow 172.16.0.0/12"
- "allow 192.168.0.0/16" - "allow 192.168.0.0/16"
# TODO: Replace this with a dynamically-generated list of public IPs from inv
- "allow 45.79.58.44/32" # bastion1.dallas.mgmt.desu.ltd
- "deny all" - "deny all"
proxy_pass: http://prometheus:9090 proxy_pass: http://prometheus:9090
# desu.ltd media bullshit # desu.ltd media bullshit

View File

@ -35,6 +35,32 @@
- /usr/local/bin/monitoring-scripts/check_docker - /usr/local/bin/monitoring-scripts/check_docker
- /usr/local/bin/monitoring-scripts/check_temp - /usr/local/bin/monitoring-scripts/check_temp
tags: [ nagios, sudo ] tags: [ nagios, sudo ]
- name: assure prometheus node exporter
# https://github.com/prometheus/node_exporter
ansible.builtin.docker_container:
name: prometheus-node-exporter
image: quay.io/prometheus/node-exporter:latest
command:
- '--path.rootfs=/host'
network_mode: host
pid_mode: host
volumes:
- /:/host:ro,rslave
tags: [ prometheus ]
- name: assure prometheus cadvisor exporter
ansible.builtin.docker_container:
name: prometheus-cadvisor-exporter
image: gcr.io/cadvisor/cadvisor:latest
ports:
- 9101:8080/tcp
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /dev/disk:/dev/disk:ro
devices:
- /dev/kmsg
- hosts: all - hosts: all
gather_facts: no gather_facts: no
tasks: tasks:

View File

@ -0,0 +1,5 @@
#!/usr/bin/env ansible-playbook
# vim:ft=ansible:
- name: restart prometheus container
docker_container: name="prometheus" state=started restart=yes
become: yes

View File

@ -9,13 +9,17 @@
with_items: with_items:
- /data/prometheus/config - /data/prometheus/config
- /data/prometheus/data - /data/prometheus/data
tags: [ docker, prometheus, monitoring ] notify: restart prometheus container
- name: template out configuration file
ansible.builtin.template:
src: prometheus.yml.j2
owner: 5476
group: 5476
mode: "0640"
dest: /data/prometheus/config/prometheus.yml
notify: restart prometheus container
- name: docker deploy prometheus - name: docker deploy prometheus
# NOTE: If you're rebuilding this, you *need* to sync the data over as this community.docker.docker_container:
# container will try to explode if you run it with empty volumes. Copy over
# prometheus.yml into the config volume first or steal it from an empty fresh
# container instance
docker_container:
name: prometheus name: prometheus
image: prom/prometheus:latest image: prom/prometheus:latest
user: 5476:5476 user: 5476:5476
@ -27,4 +31,3 @@
volumes: volumes:
- /data/prometheus/config:/etc/prometheus - /data/prometheus/config:/etc/prometheus
- /data/prometheus/data:/prometheus - /data/prometheus/data:/prometheus
tags: [ docker, prometheus, monitoring ]

View File

@ -0,0 +1,26 @@
# my global config
---
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries
# scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "node-exporter"
static_configs:
- targets:
{% for host in groups['tags_nagios'] %}
- '{{ host }}:9100'
{% endfor %}
- job_name: "cadvisor-exporter"
static_configs:
- targets:
{% for host in groups['tags_nagios'] %}
- '{{ host }}:9101'
{% endfor %}