Salt
37150bf7d1
Apparently it's completely normal behavior for this service to be not running on a fresh boot
105 lines
5.1 KiB
YAML
105 lines
5.1 KiB
YAML
# vim:ft=ansible:
|
|
- name: assure data directory for nagios
|
|
file: path=/data/nagios state=directory mode=0755
|
|
tags: [ nagios ]
|
|
- name: template out config for nagios
|
|
template: src=nagios-ansible.cfg.j2 dest=/data/nagios/etc/objects/ansible.cfg owner=root group=root mode=0644
|
|
vars:
|
|
nagios_commands:
|
|
# This command is included in the container image
|
|
- name: check_nrpe
|
|
command: "$USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$"
|
|
- name: check_by_ssh
|
|
command: "$USER1$/check_by_ssh -H $HOSTADDRESS$ -F /opt/nagios/etc/ssh_config -t 30 -q -i /opt/nagios/etc/id_ed25519 -l nagios-checker -C \"$ARG1$\""
|
|
nagios_services:
|
|
# Agentless checks
|
|
- name: HTTP
|
|
command: check_http
|
|
hostgroup: nagios-checkhttp
|
|
- name: HTTPS
|
|
command: check_http!--ssl
|
|
hostgroup: nagios-checkhttp
|
|
- name: SSH
|
|
command: check_ssh
|
|
# check_by_ssh checks
|
|
- name: CPU Load
|
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_load -r -w 0.8,0.8,0.8 -c 1.0,0.9,0.9
|
|
- name: Disk Usage
|
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_disk -M -u GB -X nfs -X tracefs -X cgroup -X tmpfs -X overlay -X shm -w 15% -c 10% -W 15% -K 10% -A -I '^/run/' -I '^udev$' -I '^/var/lib/kubelet/'
|
|
- name: Reboot Required
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_reboot_required
|
|
- name: Unit atd.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit atd.service
|
|
- name: Unit backup.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.service
|
|
- name: Unit backup.timer
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.timer
|
|
- name: Unit cron.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit cron.service
|
|
- name: Unit dbus.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit dbus.service
|
|
- name: Unit docker.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit docker.service
|
|
- name: Unit ssh.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ssh.service
|
|
- name: Unit systemd-resolved.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit systemd-resolved.service
|
|
- name: Users
|
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_users -w 3 -c 5
|
|
# Tag-specific checks
|
|
# ansible-pull
|
|
- name: Unit ansible-pull.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ansible-pull.service
|
|
hostgroup: ansible-pull
|
|
- name: Unit ansible-pull.timer
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ansible-pull.timer
|
|
hostgroup: ansible-pull
|
|
# docker
|
|
# Strictly speaking not a tag, but it's best to keep it separated
|
|
# TODO: Figure out how I'm going to implement Docker checks
|
|
# nagios-checkpgsql
|
|
- name: PSQL
|
|
command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5"
|
|
hostgroup: nagios-checkpgsql
|
|
- name: PSQL Connections
|
|
command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5 -q 'select (select count(*)::float used from pg_stat_activity) / (select setting::int max_conn from pg_settings where name=\\$\\$max_connections\\$\\$)' -W 0.5-0.7 -C 0.7-1.0"
|
|
hostgroup: nagios-checkpgsql
|
|
- name: Unit postgresql.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit postgresql.service
|
|
hostgroup: nagios-checkpgsql
|
|
# nagios-nrpeswap
|
|
- name: Swap Usage
|
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_swap -w 20% -c 10%
|
|
hostgroup: nagios-nrpeswap
|
|
# zerotier
|
|
- name: Unit zerotier-one.service
|
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit zerotier-one.service
|
|
hostgroup: zerotier
|
|
register: config
|
|
tags: [ nagios, template ]
|
|
- name: assure config file is loaded
|
|
lineinfile: path=/data/nagios/etc/nagios.cfg line='cfg_file=/opt/nagios/etc/objects/ansible.cfg'
|
|
tags: [ nagios, template ]
|
|
- name: docker deploy nagios
|
|
docker_container:
|
|
name: nagios
|
|
image: jasonrivers/nagios
|
|
env:
|
|
NAGIOSADMIN_USER: admin
|
|
NAGIOSADMIN_PASS: "{{ secret_nagios_admin_pass }}"
|
|
NAGIOS_TIMEZONE: "America/Chicago"
|
|
networks:
|
|
- name: web
|
|
aliases: [ "nagios" ]
|
|
volumes:
|
|
- /data/nagios/etc:/opt/nagios/etc
|
|
- /data/nagios/var:/opt/nagios/var
|
|
- /data/nagios/plugins:/opt/Custom-Nagios-Plugins
|
|
- /data/nagios/nagiosgraph/var:/opt/nagiosgraph/var
|
|
- /data/nagios/nagiosgraph/etc:/opt/nagiosgraph/etc
|
|
tags: [ docker, nagios ]
|
|
- name: restart nagios
|
|
docker_container: name=nagios state=started restart=yes
|
|
when: config and config is changed
|
|
tags: [ docker, nagios ]
|