# vim:ft=ansible: - name: assure data directory for nagios file: path=/data/nagios state=directory mode=0755 tags: [ nagios ] - name: template out config for nagios template: src=nagios-ansible.cfg.j2 dest=/data/nagios/etc/objects/ansible.cfg owner=root group=root mode=0644 vars: nagios_contacts: - name: salt host_notification_commands: notify-host-by-email service_notification_commands: notify-service-by-email extra: - key: email value: rehashedsalt@cock.li nagios_commands: # This command is included in the container image - name: check_nrpe command: "$USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$" - name: check_by_ssh command: "$USER1$/check_by_ssh -H $HOSTADDRESS$ -F /opt/nagios/etc/ssh_config -t 30 -q -i /opt/nagios/etc/id_ed25519 -l nagios-checker -C \"$ARG1$\"" nagios_services: # Agentless checks - name: HTTP command: check_http hostgroup: tag-nagios-checkhttp - name: HTTPS command: check_http!--ssl hostgroup: tag-nagios-checkhttp - name: SSH command: check_ssh # check_by_ssh checks - name: CPU Load command: check_by_ssh!/usr/lib/nagios/plugins/check_load -r -w 5,4,3 -c 7,6,5 - name: CPU Utilization command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_cpu_stats -w 75 -c 90 - name: Disk Usage command: check_by_ssh!/usr/lib/nagios/plugins/check_disk -M -u GB -X nfs -X tracefs -X cgroup -X tmpfs -X overlay -X shm -w 15% -c 10% -W 15% -K 10% -A -I '^/run/' -I '^udev$' -I '^/var/lib/kubelet/' - name: DNS Resolution command: check_by_ssh!/usr/lib/nagios/plugins/check_etc_resolv - name: Memory Usage command: check_by_ssh!/usr/lib/nagios/plugins/check_memory -w 20% -c 10% - name: Package Updates command: check_by_ssh!/usr/lib/nagios/plugins/check_packages extra: - key: notification_options value: c,r - name: Ping Self over DNS command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_ping_by_hostname - name: Reboot Required command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_reboot_required - name: Unit atd.service command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit atd.service - name: Unit backup.service command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.service hostgroup: "!role-hypervisor" - name: Unit backup.timer command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.timer hostgroup: "!role-hypervisor" - name: Unit cron.service command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit cron.service - name: Unit dbus.service command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit dbus.service - name: Unit docker.service command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit docker.service hostgroup: "!tag-no-docker" - name: Unit ssh.service command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ssh.service - name: Unit systemd-resolved.service command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit systemd-resolved.service hostgroup: "!role-hypervisor" - name: Users command: check_by_ssh!/usr/lib/nagios/plugins/check_users -w 3 -c 5 # Tag-specific checks # ansible-pull - name: Unit ansible-pull.service command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ansible-pull.service hostgroup: tag-ansible-pull - name: Unit ansible-pull.timer command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ansible-pull.timer hostgroup: tag-ansible-pull # docker # Strictly speaking not a tag, but it's best to keep it separated # TODO: Figure out how I'm going to implement Docker checks # nagios-checkpgsql - name: PSQL command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5" hostgroup: tag-nagios-checkpgsql - name: PSQL Connections command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5 -q 'select (select count(*)::float used from pg_stat_activity) / (select setting::int max_conn from pg_settings where name=\\$\\$max_connections\\$\\$)' -W 0.7-0.8 -C 0.8-1.0" hostgroup: tag-nagios-checkpgsql - name: Unit postgresql.service command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit postgresql.service hostgroup: tag-nagios-checkpgsql # nagios-checkswap - name: Swap Usage command: check_by_ssh!/usr/lib/nagios/plugins/check_swap -w 20% -c 10% hostgroup: tag-nagios-checkswap # zerotier - name: Unit zerotier-one.service command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit zerotier-one.service hostgroup: tag-zerotier register: config tags: [ nagios, template ] - name: assure config file is loaded lineinfile: path=/data/nagios/etc/nagios.cfg line='cfg_file=/opt/nagios/etc/objects/ansible.cfg' tags: [ nagios, template ] - name: docker deploy nagios docker_container: name: nagios image: jasonrivers/nagios env: NAGIOSADMIN_USER: admin NAGIOSADMIN_PASS: "{{ secret_nagios_admin_pass }}" NAGIOS_TIMEZONE: "America/Chicago" networks: - name: web aliases: [ "nagios" ] volumes: - /data/nagios/etc:/opt/nagios/etc - /data/nagios/var:/opt/nagios/var - /data/nagios/plugins:/opt/Custom-Nagios-Plugins - /data/nagios/nagiosgraph/var:/opt/nagiosgraph/var - /data/nagios/nagiosgraph/etc:/opt/nagiosgraph/etc - /dev/null:/opt/nagios/bin/nsca - /dev/null:/opt/nagios/bin/send_nsca tags: [ docker, nagios ] - name: restart nagios docker_container: name=nagios state=started restart=yes when: config and config is changed tags: [ docker, nagios ]