Compare commits
No commits in common. "f4117b46f8e301fe701892dee3ff7b1d09edf2e2" and "e9971f304e51fde24ff22de5a13b1332de571446" have entirely different histories.
f4117b46f8
...
e9971f304e
@ -202,20 +202,119 @@ nagios_commands:
|
|||||||
- name: notify-service-by-matrix
|
- name: notify-service-by-matrix
|
||||||
command: "/usr/bin/printf \"%b\" \"$NOTIFICATIONTYPE$\\nService $HOSTALIAS$ - $SERVICEDESC$ is $SERVICESTATE$\\nInfo: $SERVICEOUTPUT$\\nDate/Time: $LONGDATETIME$\" | /opt/Custom-Nagios-Plugins/notify-by-matrix"
|
command: "/usr/bin/printf \"%b\" \"$NOTIFICATIONTYPE$\\nService $HOSTALIAS$ - $SERVICEDESC$ is $SERVICESTATE$\\nInfo: $SERVICEOUTPUT$\\nDate/Time: $LONGDATETIME$\" | /opt/Custom-Nagios-Plugins/notify-by-matrix"
|
||||||
nagios_services:
|
nagios_services:
|
||||||
|
# Agentless checks
|
||||||
|
- name: HTTP
|
||||||
|
command: check_http
|
||||||
|
hostgroup: tag-nagios-checkhttp
|
||||||
|
- name: HTTPS
|
||||||
|
command: check_http!--ssl
|
||||||
|
hostgroup: tag-nagios-checkhttp
|
||||||
- name: SSH
|
- name: SSH
|
||||||
command: check_ssh
|
command: check_ssh
|
||||||
# check_by_ssh checks
|
# check_by_ssh checks
|
||||||
|
- name: CPU Utilization
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_cpu_stats -w 75 -c 90
|
||||||
|
- name: DNS Resolution
|
||||||
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_etc_resolv
|
||||||
|
- name: Executables in tmp
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_executables_in_tmp
|
||||||
- name: Last Ansible Play
|
- name: Last Ansible Play
|
||||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_file_age /var/lib/ansible-last-run -w 432000 -c 604800
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_file_age /var/lib/ansible-last-run -w 432000 -c 604800
|
||||||
|
- name: Memory Usage
|
||||||
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_memory -w 10% -c 5%
|
||||||
|
hostgroup: "ansible,!tag-prov-zfs"
|
||||||
|
- name: Ping Self over DNS
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_ping_by_hostname
|
||||||
- name: Reboot Required
|
- name: Reboot Required
|
||||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_reboot_required
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_reboot_required
|
||||||
|
- name: Unit atd.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit atd.service
|
||||||
- name: Unit backup.service
|
- name: Unit backup.service
|
||||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.service
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.service
|
||||||
hostgroup: "ansible,!role-hypervisor"
|
hostgroup: "ansible,!role-hypervisor"
|
||||||
- name: Unit backup.timer
|
- name: Unit backup.timer
|
||||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.timer
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit backup.timer
|
||||||
hostgroup: "ansible,!role-hypervisor"
|
hostgroup: "ansible,!role-hypervisor"
|
||||||
|
- name: Unit cron.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit cron.service
|
||||||
|
- name: Unit dbus.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit dbus.service
|
||||||
|
- name: Unit ssh.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit ssh.service
|
||||||
|
- name: Unit systemd-resolved.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit systemd-resolved.service
|
||||||
|
hostgroup: "ansible,!role-hypervisor"
|
||||||
|
- name: Users
|
||||||
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_users -w 3 -c 5
|
||||||
|
# Privileged checks
|
||||||
|
# Required because check_disk may attempt to get the free space of
|
||||||
|
# restricted mountpoints
|
||||||
|
- name: Disk Usage
|
||||||
|
command: check_by_ssh!/usr/bin/sudo /usr/lib/nagios/plugins/check_disk -M -u GB -X nfs -X tracefs -X cgroup -X tmpfs -X overlay -X shm -w 15% -c 10% -W 15% -K 10% -A -I '^/run/' -I '^udev$' -I '^/var/lib/kubelet/' -I '^/tmp/.mount_' -I '^/dev/loop'
|
||||||
|
# Device type checks
|
||||||
|
# R720
|
||||||
|
- name: CPU0 Temperature
|
||||||
|
command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_temp -n -w 65 -c 75 --sensor coretemp-isa-0000
|
||||||
|
hostgroup: device-type-r720
|
||||||
|
- name: CPU1 Temperature
|
||||||
|
command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_temp -n -w 65 -c 75 --sensor coretemp-isa-0001
|
||||||
|
hostgroup: device-type-r720
|
||||||
|
# Pi 4 4G
|
||||||
|
- name: CPU Temperature
|
||||||
|
command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_temp -n -w 65 -c 75 --sensor cpu_thermal-virtual-0
|
||||||
|
hostgroup: device-type-pi4b-2g,device-type-pi4b-4g,device-type-pi4b-4g-storage
|
||||||
|
# Device role checks
|
||||||
|
# hypervisor (which is assumed to be Proxmox)
|
||||||
|
- name: PVE Unit pve-firewall.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pve-firewall.service
|
||||||
|
hostgroup: role-hypervisor
|
||||||
|
- name: PVE Unit spiceproxy.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit spiceproxy.service
|
||||||
|
hostgroup: role-hypervisor
|
||||||
|
- name: PVE Unit pve-ha-crm.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pve-ha-crm.service
|
||||||
|
hostgroup: role-hypervisor
|
||||||
|
- name: PVE Unit pvedaemon.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pvedaemon.service
|
||||||
|
hostgroup: role-hypervisor
|
||||||
|
- name: PVE Unit pvefw-logger.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pvefw-logger.service
|
||||||
|
hostgroup: role-hypervisor
|
||||||
|
- name: PVE Unit pveproxy.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pveproxy.service
|
||||||
|
hostgroup: role-hypervisor
|
||||||
|
- name: PVE Unit pve-cluster.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pve-cluster.service
|
||||||
|
hostgroup: role-hypervisor
|
||||||
|
- name: PVE Unit pvestatd.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit pvestatd.service
|
||||||
|
hostgroup: role-hypervisor
|
||||||
# Tag-specific checks
|
# Tag-specific checks
|
||||||
|
# docker
|
||||||
|
- name: Unit docker.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit docker.service
|
||||||
|
hostgroup: "ansible,!tag-no-docker"
|
||||||
|
- name: Docker Status
|
||||||
|
command: check_by_ssh!/usr/bin/sudo /usr/local/bin/monitoring-scripts/check_docker --no-ok --status running
|
||||||
|
hostgroup: tag-nagios-checkdocker
|
||||||
|
# nagios-checkpgsql
|
||||||
|
- name: PSQL
|
||||||
|
command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5"
|
||||||
|
hostgroup: tag-nagios-checkpgsql
|
||||||
|
- name: PSQL Connections
|
||||||
|
command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5 -q 'select (select count(*)::float used from pg_stat_activity) / (select setting::int max_conn from pg_settings where name=\\$\\$max_connections\\$\\$)' -W 0.7-0.8 -C 0.8-1.0"
|
||||||
|
hostgroup: tag-nagios-checkpgsql
|
||||||
|
# https://rhaas.blogspot.com/2020/02/useless-vacuuming.html
|
||||||
|
- name: PSQL Old Xacts
|
||||||
|
command: "check_by_ssh!/usr/lib/nagios/plugins/check_pgsql -H localhost -l nagios -p {{ secret_postgresql_monitoring_password }} -w 2 -c 5 -q 'select count(*)::float from pg_prepared_xacts where age(transaction) > 5000000' -W 500-1000 -C 1000-1000000"
|
||||||
|
hostgroup: tag-nagios-checkpgsql
|
||||||
|
- name: Unit postgresql.service
|
||||||
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit postgresql.service
|
||||||
|
hostgroup: tag-nagios-checkpgsql
|
||||||
|
# nagios-checkswap
|
||||||
|
- name: Swap Usage
|
||||||
|
command: check_by_ssh!/usr/lib/nagios/plugins/check_swap -w 20% -c 10%
|
||||||
|
hostgroup: tag-nagios-checkswap
|
||||||
# zerotier
|
# zerotier
|
||||||
- name: Unit zerotier-one.service
|
- name: Unit zerotier-one.service
|
||||||
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit zerotier-one.service
|
command: check_by_ssh!/usr/local/bin/monitoring-scripts/check_systemd_unit zerotier-one.service
|
||||||
|
@ -149,5 +149,70 @@ define host {
|
|||||||
|
|
||||||
contact_groups ansible
|
contact_groups ansible
|
||||||
}
|
}
|
||||||
|
{% for service in vars.services %}
|
||||||
|
{% for tag in service.tags %}
|
||||||
|
{# #}
|
||||||
|
{% if tag.slug == "nagios-checkmatrix" %}
|
||||||
|
{% for port in service.ports %}
|
||||||
|
define service {
|
||||||
|
use ansible-generic-service
|
||||||
|
service_description Matrix Synapse - {{ service.name }} - {{ port }}
|
||||||
|
check_command check_http!--ssl -H {{ service.name }} -u https://{{ service.name }}/health -s OK -p {{ port }} -f sticky
|
||||||
|
host_name {{ host }}
|
||||||
|
contact_groups ansible
|
||||||
|
}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{# #}
|
||||||
|
{% if tag.slug == "nagios-checkminecraft" %}
|
||||||
|
{% for port in service.ports %}
|
||||||
|
define service {
|
||||||
|
use ansible-generic-service
|
||||||
|
service_description Minecraft - {{ service.name }} - {{ port }}
|
||||||
|
check_command check_by_ssh!/usr/local/bin/monitoring-scripts/check_minecraft -H {{ host }} -p {{ port }} -m "{{ service.description }}" -f -w 3 -c 5
|
||||||
|
host_name {{ host }}
|
||||||
|
contact_groups ansible
|
||||||
|
}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{# #}
|
||||||
|
{% if tag.slug == "nagios-checkhttp" %}
|
||||||
|
{% for port in service.ports %}
|
||||||
|
define service {
|
||||||
|
use ansible-generic-service
|
||||||
|
service_description HTTP - {{ service.name }} - {{ port }}
|
||||||
|
check_command check_http!-H {{ service.name }} -p {{ port }} -f sticky
|
||||||
|
host_name {{ host }}
|
||||||
|
contact_groups ansible
|
||||||
|
}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{# #}
|
||||||
|
{% if tag.slug == "nagios-checkhttps" %}
|
||||||
|
{% for port in service.ports %}
|
||||||
|
define service {
|
||||||
|
use ansible-generic-service
|
||||||
|
service_description HTTPS - {{ service.name }} - {{ port }}
|
||||||
|
check_command check_http!--ssl -H {{ service.name }} -p {{ port }} -f sticky
|
||||||
|
host_name {{ host }}
|
||||||
|
contact_groups ansible
|
||||||
|
}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{# #}
|
||||||
|
{% if tag.slug == "nagios-checktcp" %}
|
||||||
|
{% for port in service.ports %}
|
||||||
|
define service {
|
||||||
|
use ansible-generic-service
|
||||||
|
service_description TCP {{ service.name }} - {{ port }}
|
||||||
|
check_command check_tcp!{{ port }}
|
||||||
|
host_name {{ host }}
|
||||||
|
contact_groups ansible
|
||||||
|
}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{# #}
|
||||||
|
{% endfor %}
|
||||||
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
@ -182,6 +182,55 @@ define host {
|
|||||||
# Created: {{ service.value.created }}
|
# Created: {{ service.value.created }}
|
||||||
# Updated: {{ service.value.last_updated }}
|
# Updated: {{ service.value.last_updated }}
|
||||||
{% for tag in service.value.tags %}
|
{% for tag in service.value.tags %}
|
||||||
|
{# #}
|
||||||
|
{% if tag.slug == "nagios-checkminecraft" %}
|
||||||
|
{% for port in service.value.ports %}
|
||||||
|
define service {
|
||||||
|
use ansible-generic-service
|
||||||
|
service_description Minecraft - {{ service.value.name }} - {{ port }}
|
||||||
|
check_command check_by_ssh!/usr/local/bin/monitoring-scripts/check_minecraft -H {{ host_name }} -p {{ port }} -m "{{ service.value.description }}" -f -w 3 -c 5
|
||||||
|
host_name {{ host_name }}
|
||||||
|
contact_groups ansible
|
||||||
|
}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{# #}
|
||||||
|
{% if tag.slug == "nagios-checkhttp" %}
|
||||||
|
{% for port in service.value.ports %}
|
||||||
|
define service {
|
||||||
|
use ansible-generic-service
|
||||||
|
service_description HTTP - {{ service.value.name }} - {{ port }}
|
||||||
|
check_command check_http!-H {{ service.value.name }} -p {{ port }} -f sticky
|
||||||
|
host_name {{ host_name }}
|
||||||
|
contact_groups ansible
|
||||||
|
}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{# #}
|
||||||
|
{% if tag.slug == "nagios-checkhttps" %}
|
||||||
|
{% for port in service.value.ports %}
|
||||||
|
define service {
|
||||||
|
use ansible-generic-service
|
||||||
|
service_description HTTPS - {{ service.value.name }} - {{ port }}
|
||||||
|
check_command check_http!--ssl -H {{ service.value.name }} -p {{ port }} -f sticky
|
||||||
|
host_name {{ host_name }}
|
||||||
|
contact_groups ansible
|
||||||
|
}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{# #}
|
||||||
|
{% if tag.slug == "nagios-checktcp" %}
|
||||||
|
{% for port in service.value.ports %}
|
||||||
|
define service {
|
||||||
|
use ansible-generic-service
|
||||||
|
service_description TCP {{ service.value.name }} - {{ port }}
|
||||||
|
check_command check_tcp!{{ port }}
|
||||||
|
host_name {{ host_name }}
|
||||||
|
contact_groups ansible
|
||||||
|
}
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{# #}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
@ -20,19 +20,11 @@ scrape_configs:
|
|||||||
{% for host in groups['tags_nagios'] %}
|
{% for host in groups['tags_nagios'] %}
|
||||||
{% for service in vars.services %}
|
{% for service in vars.services %}
|
||||||
{% for tag in service.tags %}
|
{% for tag in service.tags %}
|
||||||
{# #}
|
|
||||||
{% if tag.slug == "nagios-checkhttps" %}
|
{% if tag.slug == "nagios-checkhttps" %}
|
||||||
{% for port in service.ports %}
|
{% for port in service.ports %}
|
||||||
- "https://{{ service.name }}:{{ port }}"
|
- "https://{{ service.name }}:{{ port }}"
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{# #}
|
|
||||||
{% if tag.slug == "nagios-checkmatrix" %}
|
|
||||||
{% for port in service.ports %}
|
|
||||||
- "https://{{ service.name }}:{{ port }}/health"
|
|
||||||
{% endfor %}
|
|
||||||
{% endif %}
|
|
||||||
{# #}
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
Loading…
Reference in New Issue
Block a user